From f4866eec228e14dd6fb632b8f1f6fcf53218fd9b Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 24 Jan 2017 17:46:17 +0000 Subject: [PATCH] [AMDGPU] Add VGPR copies post regalloc fix pass Regalloc creates COPY instructions which do not formally use VALU. That results in v_mov instructions displaced after exec mask modification. One pass which do it is SIOptimizeExecMasking, but potentially it can be done by other passes too. This patch adds a pass immediately after regalloc to add implicit exec use operand to all VGPR copy instructions. Differential Revision: https://reviews.llvm.org/D28874 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292956 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPU.h | 3 + lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 + lib/Target/AMDGPU/CMakeLists.txt | 1 + lib/Target/AMDGPU/SIFixVGPRCopies.cpp | 72 +++++++++++++++++++++++ test/CodeGen/AMDGPU/fix-vgpr-copies.mir | 44 ++++++++++++++ 5 files changed, 122 insertions(+) create mode 100644 lib/Target/AMDGPU/SIFixVGPRCopies.cpp create mode 100644 test/CodeGen/AMDGPU/fix-vgpr-copies.mir diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 7b0a7f4b605..78876ab4f9e 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -60,6 +60,9 @@ extern char &SIShrinkInstructionsID; void initializeSIFixSGPRCopiesPass(PassRegistry &); extern char &SIFixSGPRCopiesID; +void initializeSIFixVGPRCopiesPass(PassRegistry &); +extern char &SIFixVGPRCopiesID; + void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index d8a0c716279..06adb372305 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -86,6 +86,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { PassRegistry *PR = PassRegistry::getPassRegistry(); initializeSILowerI1CopiesPass(*PR); initializeSIFixSGPRCopiesPass(*PR); + initializeSIFixVGPRCopiesPass(*PR); initializeSIFoldOperandsPass(*PR); initializeSIShrinkInstructionsPass(*PR); initializeSIFixControlFlowLiveIntervalsPass(*PR); @@ -615,6 +616,7 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { } void GCNPassConfig::addPostRegAlloc() { + addPass(&SIFixVGPRCopiesID); addPass(&SIOptimizeExecMaskingID); TargetPassConfig::addPostRegAlloc(); } diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 02d441756c8..bbd06b19d63 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -68,6 +68,7 @@ add_llvm_target(AMDGPUCodeGen SIDebuggerInsertNops.cpp SIFixControlFlowLiveIntervals.cpp SIFixSGPRCopies.cpp + SIFixVGPRCopies.cpp SIFoldOperands.cpp SIFrameLowering.cpp SIInsertSkips.cpp diff --git a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp new file mode 100644 index 00000000000..3d3121788b5 --- /dev/null +++ b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp @@ -0,0 +1,72 @@ +//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Add implicit use of exec to vector register copies. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-fix-vgpr-copies" + +namespace { + +class SIFixVGPRCopies : public MachineFunctionPass { +public: + static char ID; + +public: + SIFixVGPRCopies() : MachineFunctionPass(ID) { + initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { return "SI Fix VGPR copies"; } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false) + +char SIFixVGPRCopies::ID = 0; + +char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID; + +bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) { + const SISubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + switch (MI.getOpcode()) { + case AMDGPU::COPY: + if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) { + MI.addOperand(MF, + MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + DEBUG(dbgs() << "Add exec use to " << MI); + Changed = true; + } + break; + default: + break; + } + } + } + + return Changed; +} diff --git a/test/CodeGen/AMDGPU/fix-vgpr-copies.mir b/test/CodeGen/AMDGPU/fix-vgpr-copies.mir new file mode 100644 index 00000000000..4951e0df4d3 --- /dev/null +++ b/test/CodeGen/AMDGPU/fix-vgpr-copies.mir @@ -0,0 +1,44 @@ +# RUN: llc -march=amdgcn -start-after=greedy -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s +# Check that we first do all vector instructions and only then change exec +# CHECK-DAG: COPY %vgpr10_vgpr11 +# CHECK-DAG: COPY %vgpr12_vgpr13 +# CHECK: %exec = COPY + +--- +name: main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%sgpr4_sgpr5' } + - { reg: '%sgpr6' } + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %vgpr3, %vgpr10_vgpr11, %vgpr12_vgpr13 + + %vcc = V_CMP_NE_U32_e64 0, killed %vgpr3, implicit %exec + %sgpr4_sgpr5 = COPY %exec, implicit-def %exec + %sgpr6_sgpr7 = S_AND_B64 %sgpr4_sgpr5, killed %vcc, implicit-def dead %scc + %sgpr4_sgpr5 = S_XOR_B64 %sgpr6_sgpr7, killed %sgpr4_sgpr5, implicit-def dead %scc + %vgpr61_vgpr62 = COPY %vgpr10_vgpr11 + %vgpr155_vgpr156 = COPY %vgpr12_vgpr13 + %exec = S_MOV_B64_term killed %sgpr6_sgpr7 +... -- 2.40.0