From: Stanislav Mekhanoshin Date: Mon, 11 Sep 2017 17:13:57 +0000 (+0000) Subject: [AMDGPU] Produce madak and madmk from the two-address pass X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=46582be9742d931368f9455a768df95d9525ccf5;p=llvm [AMDGPU] Produce madak and madmk from the two-address pass These two instructions are normally selected, but when the two address pass converts mac into mad we end up with the mad where we could have one of these. Differential Revision: https://reviews.llvm.org/D37389 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312928 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index f7f6d52e751..e9360701979 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2083,6 +2083,19 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa, return false; } +static int64_t getFoldableImm(const MachineOperand* MO) { + if (!MO->isReg()) + return false; + const MachineFunction *MF = MO->getParent()->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + auto Def = MRI.getUniqueVRegDef(MO->getReg()); + if (Def && (Def->getOpcode() == AMDGPU::S_MOV_B32 || + Def->getOpcode() == AMDGPU::V_MOV_B32_e32) && + Def->getOperand(1).isImm()) + return Def->getOperand(1).getImm(); + return AMDGPU::NoRegister; +} + MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const { @@ -2120,6 +2133,35 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); + if (!Src0Mods && !Src1Mods && !Clamp && !Omod) { + if (auto Imm = getFoldableImm(Src2)) { + return BuildMI(*MBB, MI, MI.getDebugLoc(), + get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32)) + .add(*Dst) + .add(*Src0) + .add(*Src1) + .addImm(Imm); + } + if (auto Imm = getFoldableImm(Src1)) { + return BuildMI(*MBB, MI, MI.getDebugLoc(), + get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32)) + .add(*Dst) + .add(*Src0) + .addImm(Imm) + .add(*Src2); + } + if (auto Imm = getFoldableImm(Src0)) { + if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32, + AMDGPU::OpName::src0), Src1)) + return BuildMI(*MBB, MI, MI.getDebugLoc(), + get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32)) + .add(*Dst) + .add(*Src1) + .addImm(Imm) + .add(*Src2); + } + } + return BuildMI(*MBB, MI, MI.getDebugLoc(), get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32)) .add(*Dst) diff --git a/test/CodeGen/AMDGPU/madak.ll b/test/CodeGen/AMDGPU/madak.ll index 77c35fac8b5..cc083339338 100644 --- a/test/CodeGen/AMDGPU/madak.ll +++ b/test/CodeGen/AMDGPU/madak.ll @@ -34,7 +34,7 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 -; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]] +; GCN-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 ; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]] ; GCN: s_endpgm define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { diff --git a/test/CodeGen/AMDGPU/twoaddr-mad.mir b/test/CodeGen/AMDGPU/twoaddr-mad.mir new file mode 100644 index 00000000000..ebda1d26ddc --- /dev/null +++ b/test/CodeGen/AMDGPU/twoaddr-mad.mir @@ -0,0 +1,110 @@ +# RUN: llc -march=amdgcn %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: test_madmk_reg_imm_f32 +# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit %exec +--- +name: test_madmk_reg_imm_f32 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit %exec + %3 = V_MAC_F32_e32 killed %0.sub0, %2, killed %1, implicit %exec + +... + +# GCN-LABEL: name: test_madmk_imm_reg_f32 +# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit %exec +--- +name: test_madmk_imm_reg_f32 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit %exec + %3 = V_MAC_F32_e32 %2, killed %0.sub0, killed %1, implicit %exec + +... + +# GCN-LABEL: name: test_madak_f32 +# GCN: V_MADAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit %exec +--- +name: test_madak_f32 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = V_MOV_B32_e32 1078523331, implicit %exec + %2 = V_MAC_F32_e32 killed %0.sub0, %0.sub1, %1, implicit %exec + +... + +# GCN-LABEL: name: test_madmk_reg_imm_f16 +# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit %exec +--- +name: test_madmk_reg_imm_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit %exec + %3 = V_MAC_F16_e32 killed %0.sub0, %2, killed %1, implicit %exec + +... + +# GCN-LABEL: name: test_madmk_imm_reg_f16 +# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit %exec +--- +name: test_madmk_imm_reg_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit %exec + %3 = V_MAC_F16_e32 %2, killed %0.sub0, killed %1, implicit %exec + +... + +# GCN-LABEL: name: test_madak_f16 +# GCN: V_MADAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit %exec +--- +name: test_madak_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = V_MOV_B32_e32 1078523331, implicit %exec + %2 = V_MAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit %exec +... diff --git a/test/CodeGen/AMDGPU/v_madak_f16.ll b/test/CodeGen/AMDGPU/v_madak_f16.ll index 0148ff470b7..ab47cc99a21 100644 --- a/test/CodeGen/AMDGPU/v_madak_f16.ll +++ b/test/CodeGen/AMDGPU/v_madak_f16.ll @@ -23,9 +23,9 @@ entry: } ; GCN-LABEL: {{^}}madak_f16_use_2 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_madak_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x41200000 ; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_madak_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x4900 ; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm define amdgpu_kernel void @madak_f16_use_2(