From: Matt Arsenault Date: Mon, 8 Jul 2019 18:48:42 +0000 (+0000) Subject: RegUsageInfoCollector: Don't iterate all regs for every reg class X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f0fdc19311d1c31e13958e8094db3f7983ef4f95;p=llvm RegUsageInfoCollector: Don't iterate all regs for every reg class This is extremly slow on AMDGPU, which has a lot of physical register and a lot of register classes. determineCalleeSaves, via MachineRegisterInfo::isPhysRegUsed already added all of the super registers to the saved set. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365370 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index 219c237e2e8..ee1662a252c 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -189,42 +189,17 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { // Target will return the set of registers that it saves/restores as needed. SavedRegs.clear(); TFI.determineCalleeSaves(MF, SavedRegs); + if (SavedRegs.none()) + return; // Insert subregs. const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { - unsigned Reg = CSRegs[i]; - if (SavedRegs.test(Reg)) - for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR) + MCPhysReg Reg = CSRegs[i]; + if (SavedRegs.test(Reg)) { + // Save subregisters + for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR) SavedRegs.set(*SR); - } - - // Insert any register fully saved via subregisters. - // FIXME: Rewrite to use regunits. - for (const TargetRegisterClass *RC : TRI.regclasses()) { - if (!RC->CoveredBySubRegs) - continue; - - for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) { - if (SavedRegs.test(PReg)) - continue; - - // Check if PReg is fully covered by its subregs. - if (!RC->contains(PReg)) - continue; - - // Add PReg to SavedRegs if all subregs are saved. - bool AllSubRegsSaved = true; - bool HasAtLeastOneSubreg = false; - for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) { - HasAtLeastOneSubreg = true; - if (!SavedRegs.test(*SR)) { - AllSubRegsSaved = false; - break; - } - } - if (AllSubRegsSaved && HasAtLeastOneSubreg) - SavedRegs.set(PReg); } } } diff --git a/test/CodeGen/AMDGPU/ipra-regmask.ll b/test/CodeGen/AMDGPU/ipra-regmask.ll new file mode 100644 index 00000000000..432470a5697 --- /dev/null +++ b/test/CodeGen/AMDGPU/ipra-regmask.ll @@ -0,0 +1,46 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s +; Make sure the expected regmask is generated for sub/superregisters. + +; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}} +define void @csr() #0 { + call void asm sideeffect "", "~{v0},~{v36},~{v37}"() #0 + ret void +} + +; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} +define void @subregs_for_super() #0 { + call void asm sideeffect "", "~{v0},~{v1}"() #0 + ret void +} + +; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} +define void @clobbered_reg_with_sub() #0 { + call void asm sideeffect "", "~{v[0:1]}"() #0 + ret void +} + +; CHECK-DAG: nothing Clobbered Registers: {{$}} +define void @nothing() #0 { + ret void +} + +; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 {{$}} +define void @special_regs() #0 { + call void asm sideeffect "", "~{m0},~{scc}"() #0 + ret void +} + +; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo {{$}} +define void @vcc() #0 { + call void asm sideeffect "", "~{vcc}"() #0 + ret void +} + +@llvm.used = appending global [6 x i8*] [i8* bitcast (void ()* @csr to i8*), + i8* bitcast (void ()* @subregs_for_super to i8*), + i8* bitcast (void ()* @clobbered_reg_with_sub to i8*), + i8* bitcast (void ()* @nothing to i8*), + i8* bitcast (void ()* @special_regs to i8*), + i8* bitcast (void ()* @vcc to i8*)] + +attributes #0 = { nounwind }