From c4a91bfbcee65533bbbbb0afc46aa35b6f9364b7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 5 Jul 2019 23:33:43 +0000 Subject: [PATCH] RegUsageInfoCollector: Skip AMDGPU entry point functions I'm not sure if it's worth it or not to add a hook to disable the pass for an arbitrary function. This pass is taking up to 5% of compile time in tiny programs by iterating through all of the physical registers in every register class. This pass should be rewritten in terms of regunits. For now, skip doing anything for entry point functions. The vast majority of functions in the real world aren't callable, so just not running this will give the majority of the benefit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365255 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegUsageInfoCollector.cpp | 48 +++++++++++++++++++++++---- test/CodeGen/ARM/ipra-reg-usage.ll | 3 ++ test/CodeGen/X86/ipra-inline-asm.ll | 2 ++ test/CodeGen/X86/ipra-reg-usage.ll | 3 ++ 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index 3031195807d..219c237e2e8 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -77,14 +77,45 @@ FunctionPass *llvm::createRegUsageInfoCollector() { return new RegUsageInfoCollector(); } +// TODO: Move to hook somwehere? + +// Return true if it is useful to track the used registers for IPRA / no CSR +// optimizations. This is not useful for entry points, and computing the +// register usage information is expensive. +static bool isCallableFunction(const MachineFunction &MF) { + switch (MF.getFunction().getCallingConv()) { + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_KERNEL: + return false; + default: + return true; + } +} + bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const LLVMTargetMachine &TM = MF.getTarget(); LLVM_DEBUG(dbgs() << " -------------------- " << getPassName() - << " -------------------- \n"); - LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n"); + << " -------------------- \nFunction Name : " + << MF.getName() << '\n'); + + // Analyzing the register usage may be expensive on some targets. + if (!isCallableFunction(MF)) { + LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n"); + return false; + } + + // If there are no callers, there's no point in computing more precise + // register usage here. + if (MF.getFunction().use_empty()) { + LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n"); + return false; + } std::vector RegMask; @@ -110,6 +141,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { }; // Scan all the physical registers. When a register is defined in the current // function set it and all the aliasing registers as defined in the regmask. + // FIXME: Rewrite to use regunits. for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { // Don't count registers that are saved and restored. if (SavedRegs.test(PReg)) @@ -135,11 +167,14 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { << " function optimized for not having CSR.\n"); } - for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) - if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) - LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " "); + LLVM_DEBUG( + for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { + if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) + dbgs() << printReg(PReg, TRI) << " "; + } - LLVM_DEBUG(dbgs() << " \n----------------------------------------\n"); + dbgs() << " \n----------------------------------------\n"; + ); PRUI.storeUpdateRegUsageInfo(F, RegMask); @@ -165,6 +200,7 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { } // Insert any register fully saved via subregisters. + // FIXME: Rewrite to use regunits. for (const TargetRegisterClass *RC : TRI.regclasses()) { if (!RC->CoveredBySubRegs) continue; diff --git a/test/CodeGen/ARM/ipra-reg-usage.ll b/test/CodeGen/ARM/ipra-reg-usage.ll index a929d6e0930..007d176e521 100644 --- a/test/CodeGen/ARM/ipra-reg-usage.ll +++ b/test/CodeGen/ARM/ipra-reg-usage.ll @@ -11,5 +11,8 @@ define void @foo()#0 { call void @bar2() ret void } + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)] + declare void @bar2() attributes #0 = {nounwind} diff --git a/test/CodeGen/X86/ipra-inline-asm.ll b/test/CodeGen/X86/ipra-inline-asm.ll index 94d0e7842fd..c874a09980d 100644 --- a/test/CodeGen/X86/ipra-inline-asm.ll +++ b/test/CodeGen/X86/ipra-inline-asm.ll @@ -17,4 +17,6 @@ define void @foo() #0 { ret void } +@llvm.used = appending global [2 x i8*] [i8* bitcast (void ()* @foo to i8*), i8* bitcast (void ()* @bar to i8*)] + attributes #0 = { nounwind } diff --git a/test/CodeGen/X86/ipra-reg-usage.ll b/test/CodeGen/X86/ipra-reg-usage.ll index 2a557f2902a..3a877bfb345 100644 --- a/test/CodeGen/X86/ipra-reg-usage.ll +++ b/test/CodeGen/X86/ipra-reg-usage.ll @@ -9,4 +9,7 @@ define preserve_allcc void @foo()#0 { ret void } declare void @bar2() + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)] + attributes #0 = {nounwind} -- 2.40.0