From 0cc137e0510f1fc0608a5be00655dbcd0e46d2a6 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 20 Jun 2017 18:43:14 +0000 Subject: [PATCH] RegisterScavenging: Followup to r305625 This does some improvements/cleanup to the recently introduced scavengeRegisterBackwards() functionality: - Rewrite findSurvivorBackwards algorithm to use the existing LiveRegUnit::accumulateBackward() code. This also avoids the Available and Candidates bitset and just need 1 LiveRegUnit instance (= 1 bitset). - Pick registers in allocation order instead of register number order. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305817 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegisterScavenging.cpp | 79 +++++++++---------- test/CodeGen/AArch64/swiftself-scavenger.ll | 2 +- .../CodeGen/AMDGPU/frame-index-elimination.ll | 34 ++++---- test/CodeGen/AMDGPU/spill-m0.ll | 8 +- test/CodeGen/PowerPC/2010-02-12-saveCR.ll | 6 +- test/CodeGen/PowerPC/vsx-spill.ll | 10 +-- test/CodeGen/PowerPC/vsx.ll | 30 +++---- 7 files changed, 83 insertions(+), 86 deletions(-) diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index bffa94a80a7..05e641d9489 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -372,60 +372,62 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, /// clobbered for the longest time. /// Returns the register and the earliest position we know it to be free or /// the position MBB.end() if no register is available. -static std::pair -findSurvivorBackwards(const TargetRegisterInfo &TRI, +static std::pair +findSurvivorBackwards(const MachineRegisterInfo &MRI, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, - BitVector &Available, BitVector &Candidates) { + const LiveRegUnits &LiveOut, ArrayRef AllocationOrder) { bool FoundTo = false; - unsigned Survivor = 0; + MCPhysReg Survivor = 0; MachineBasicBlock::iterator Pos; MachineBasicBlock &MBB = *From->getParent(); unsigned InstrLimit = 25; unsigned InstrCountDown = InstrLimit; + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + LiveRegUnits Used(TRI); + for (MachineBasicBlock::iterator I = From;; --I) { const MachineInstr &MI = *I; - // Remove any candidates touched by instruction. - bool FoundVReg = false; - for (const MachineOperand &MO : MI.operands()) { - if (MO.isRegMask()) { - Candidates.clearBitsNotInMask(MO.getRegMask()); - continue; - } - if (!MO.isReg() || MO.isUndef() || MO.isDebug()) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - FoundVReg = true; - } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) - Candidates.reset(*AI); - } - } + Used.accumulateBackward(MI); if (I == To) { - // If one of the available registers survived this long take it. - Available &= Candidates; - int Reg = Available.find_first(); - if (Reg != -1) - return std::make_pair(Reg, MBB.end()); + // See if one of the registers in RC wasn't used so far. + for (MCPhysReg Reg : AllocationOrder) { + if (!MRI.isReserved(Reg) && Used.available(Reg) && + LiveOut.available(Reg)) + return std::make_pair(Reg, MBB.end()); + } // Otherwise we will continue up to InstrLimit instructions to find // the register which is not defined/used for the longest time. FoundTo = true; Pos = To; } if (FoundTo) { - if (Survivor == 0 || !Candidates.test(Survivor)) { - int Reg = Candidates.find_first(); - if (Reg == -1) + if (Survivor == 0 || !Used.available(Survivor)) { + MCPhysReg AvilableReg = 0; + for (MCPhysReg Reg : AllocationOrder) { + if (!MRI.isReserved(Reg) && Used.available(Reg)) { + AvilableReg = Reg; + break; + } + } + if (AvilableReg == 0) break; - Survivor = Reg; + Survivor = AvilableReg; } if (--InstrCountDown == 0) break; + + // Keep searching when we find a vreg since the spilled register will + // be usefull for this other vreg as well later. + bool FoundVReg = false; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + FoundVReg = true; + break; + } + } if (FoundVReg) { - // Keep searching when we find a vreg since the spilled register will - // be usefull for this other vreg as well later. InstrCountDown = InstrLimit; Pos = I; } @@ -568,18 +570,13 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, bool RestoreAfter, int SPAdj) { const MachineBasicBlock &MBB = *To->getParent(); const MachineFunction &MF = *MBB.getParent(); - // Consider all allocatable registers in the register class initially - BitVector Candidates = TRI->getAllocatableSet(MF, &RC); - - // Try to find a register that's unused if there is one, as then we won't - // have to spill. - BitVector Available = getRegsAvailable(&RC); // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; - std::pair P = - findSurvivorBackwards(*TRI, MBBI, To, Available, Candidates); - unsigned Reg = P.first; + ArrayRef AllocationOrder = RC.getRawAllocationOrder(MF); + std::pair P = + findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder); + MCPhysReg Reg = P.first; MachineBasicBlock::iterator SpillBefore = P.second; assert(Reg != 0 && "No register left to scavenge!"); // Found an available register? diff --git a/test/CodeGen/AArch64/swiftself-scavenger.ll b/test/CodeGen/AArch64/swiftself-scavenger.ll index 6d027844093..da069f472b4 100644 --- a/test/CodeGen/AArch64/swiftself-scavenger.ll +++ b/test/CodeGen/AArch64/swiftself-scavenger.ll @@ -5,7 +5,7 @@ ; CHECK: str [[REG:x[0-9]+]], [sp, #8] ; CHECK: add [[REG]], sp, #248 ; CHECK: str xzr, [{{\s*}}[[REG]], #32760] -; CHECK: ldr x30, [sp, #8] +; CHECK: ldr [[REG]], [sp, #8] target triple = "arm64-apple-ios" @ptr8 = external global i8* diff --git a/test/CodeGen/AMDGPU/frame-index-elimination.ll b/test/CodeGen/AMDGPU/frame-index-elimination.ll index df4fe09079e..b49d8e2d89d 100644 --- a/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -6,9 +6,9 @@ ; Materialize into a mov. Make sure there isn't an unnecessary copy. ; GCN-LABEL: {{^}}func_mov_fi_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: s_sub_u32 vcc_hi, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], vcc_hi, 6 -; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]] +; GCN: s_sub_u32 s6, s5, s4 +; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 +; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 define void @func_mov_fi_i32() #0 { @@ -22,9 +22,9 @@ define void @func_mov_fi_i32() #0 { ; GCN-LABEL: {{^}}func_add_constant_to_fi_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: s_sub_u32 vcc_hi, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], vcc_hi, 6 -; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]] +; GCN: s_sub_u32 s6, s5, s4 +; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 +; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0 ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 @@ -39,9 +39,9 @@ define void @func_add_constant_to_fi_i32() #0 { ; into. ; GCN-LABEL: {{^}}func_other_fi_user_i32: -; GCN: s_sub_u32 vcc_hi, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], vcc_hi, 6 -; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]] +; GCN: s_sub_u32 s6, s5, s4 +; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 +; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] ; GCN-NEXT: v_mul_lo_i32 v0, v0, 9 ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 @@ -71,8 +71,8 @@ define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 { ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr: ; GCN: s_waitcnt -; GCN-NEXT: s_sub_u32 vcc_hi, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 v0, vcc_hi, 6 +; GCN-NEXT: s_sub_u32 s6, s5, s4 +; GCN-NEXT: v_lshr_b32_e64 v0, s6, 6 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0 ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 @@ -99,8 +99,8 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) # } ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: -; GCN: s_sub_u32 vcc_hi, s5, s4 -; GCN: v_lshr_b32_e64 v1, vcc_hi, 6 +; GCN: s_sub_u32 s6, s5, s4 +; GCN: v_lshr_b32_e64 v1, s6, 6 ; GCN: s_and_saveexec_b64 ; GCN: v_add_i32_e32 v0, vcc, 4, v1 @@ -123,10 +123,10 @@ ret: ; Added offset can't be used with VOP3 add ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32: -; GCN: s_sub_u32 vcc_hi, s5, s4 -; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], vcc_hi, 6 -; GCN-DAG: s_movk_i32 vcc_hi, 0x204 -; GCN: v_add_i32_e32 v0, vcc, vcc_hi, [[SCALED]] +; GCN: s_sub_u32 s6, s5, s4 +; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 +; GCN-DAG: s_movk_i32 s6, 0x204 +; GCN: v_add_i32_e64 v0, s[6:7], s6, [[SCALED]] ; GCN: v_mul_lo_i32 v0, v0, 9 ; GCN: ds_write_b32 v0, v0 define void @func_other_fi_user_non_inline_imm_offset_i32() #0 { diff --git a/test/CodeGen/AMDGPU/spill-m0.ll b/test/CodeGen/AMDGPU/spill-m0.ll index 7e8fa118c2c..1147464c1dd 100644 --- a/test/CodeGen/AMDGPU/spill-m0.ll +++ b/test/CodeGen/AMDGPU/spill-m0.ll @@ -119,10 +119,10 @@ endif: ; preds = %else, %if ; GCN: ; clobber m0 -; TOSMEM: s_mov_b32 vcc_hi, m0 +; TOSMEM: s_mov_b32 s2, m0 ; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill -; TOSMEM: s_mov_b32 m0, vcc_hi +; TOSMEM: s_mov_b32 m0, s2 ; TOSMEM: s_mov_b64 exec, ; TOSMEM: s_cbranch_execz @@ -170,10 +170,10 @@ endif: ; TOSMEM: s_mov_b32 m0, -1 -; TOSMEM: s_mov_b32 vcc_hi, m0 +; TOSMEM: s_mov_b32 s0, m0 ; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload -; TOSMEM: s_mov_b32 m0, vcc_hi +; TOSMEM: s_mov_b32 m0, s0 ; TOSMEM: s_waitcnt lgkmcnt(0) ; TOSMEM: ds_write_b64 diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll index 5f1555e77af..9540249eaed 100644 --- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll +++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll @@ -8,15 +8,15 @@ entry: ; Note that part of what is being checked here is proper register reuse. ; CHECK: mfcr [[T1:r[0-9]+]] ; cr2 ; CHECK: lis [[T2:r[0-9]+]], 1 -; CHECK: addi r3, r1, 72 ; CHECK: rotlwi [[T1]], [[T1]], 8 ; CHECK: ori [[T2]], [[T2]], 34540 ; CHECK: stwx [[T1]], r1, [[T2]] -; CHECK: lis [[T3:r[0-9]+]], 1 ; CHECK: mfcr [[T4:r[0-9]+]] ; cr3 -; CHECK: ori [[T3]], [[T3]], 34536 +; CHECK: lis [[T3:r[0-9]+]], 1 ; CHECK: rotlwi [[T4]], [[T4]], 12 +; CHECK: ori [[T3]], [[T3]], 34536 ; CHECK: stwx [[T4]], r1, [[T3]] +; CHECK: addi r3, r1, 72 %x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %x1 = bitcast [100000 x i8]* %x to i8* ; [#uses=1] diff --git a/test/CodeGen/PowerPC/vsx-spill.ll b/test/CodeGen/PowerPC/vsx-spill.ll index 4dec0daecd9..93ad9855626 100644 --- a/test/CodeGen/PowerPC/vsx-spill.ll +++ b/test/CodeGen/PowerPC/vsx-spill.ll @@ -23,9 +23,9 @@ entry: ; CHECK-REG: blr ; CHECK-FISL: @foo1 -; CHECK-FISL: lis 0, -1 -; CHECK-FISL: ori 0, 0, 65384 -; CHECK-FISL: stxsdx 1, 1, 0 +; CHECK-FISL: lis 3, -1 +; CHECK-FISL: ori 3, 3, 65384 +; CHECK-FISL: stxsdx 1, 1, 3 ; CHECK-FISL: blr ; CHECK-P9-REG: @foo1 @@ -54,8 +54,8 @@ entry: ; CHECK-FISL: @foo2 ; CHECK-FISL: xsadddp [[R1:[0-9]+]], 1, 1 -; CHECK-FISL: stxsdx [[R1]], [[R1]], 0 -; CHECK-FISL: lxsdx [[R1]], [[R1]], 0 +; CHECK-FISL: stxsdx [[R1]], [[R1]], 3 +; CHECK-FISL: lxsdx [[R1]], [[R1]], 3 ; CHECK-FISL: blr ; CHECK-P9-REG: @foo2 diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll index cfea3e5696d..26b59926c7f 100644 --- a/test/CodeGen/PowerPC/vsx.ll +++ b/test/CodeGen/PowerPC/vsx.ll @@ -235,9 +235,9 @@ entry: ; CHECK-FISL-LABEL: @test14 ; CHECK-FISL: xxlor 0, 34, 35 ; CHECK-FISL: xxlnor 34, 34, 35 -; CHECK-FISL: lis 0, -1 -; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stxvd2x 0, 1, 0 +; CHECK-FISL: lis 3, -1 +; CHECK-FISL: ori 3, 3, 65520 +; CHECK-FISL: stxvd2x 0, 1, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test14 @@ -260,9 +260,9 @@ entry: ; CHECK-FISL: xxlor 36, 0, 0 ; CHECK-FISL: xxlnor 0, 34, 35 ; CHECK-FISL: xxlor 34, 0, 0 -; CHECK-FISL: lis 0, -1 -; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stxvd2x 36, 1, 0 +; CHECK-FISL: lis 3, -1 +; CHECK-FISL: ori 3, 3, 65520 +; CHECK-FISL: stxvd2x 36, 1, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test15 @@ -285,9 +285,9 @@ entry: ; CHECK-FISL: xxlor 36, 0, 0 ; CHECK-FISL: xxlnor 0, 34, 35 ; CHECK-FISL: xxlor 34, 0, 0 -; CHECK-FISL: lis 0, -1 -; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stxvd2x 36, 1, 0 +; CHECK-FISL: lis 3, -1 +; CHECK-FISL: ori 3, 3, 65520 +; CHECK-FISL: stxvd2x 36, 1, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test16 @@ -330,9 +330,9 @@ entry: ; CHECK-FISL: xxlor 36, 0, 0 ; CHECK-FISL: xxlandc 0, 34, 35 ; CHECK-FISL: xxlor 34, 0, 0 -; CHECK-FISL: lis 0, -1 -; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stxvd2x 36, 1, 0 +; CHECK-FISL: lis 3, -1 +; CHECK-FISL: ori 3, 3, 65520 +; CHECK-FISL: stxvd2x 36, 1, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test18 @@ -355,9 +355,9 @@ entry: ; CHECK-FISL: xxlor 36, 0, 0 ; CHECK-FISL: xxlandc 0, 34, 35 ; CHECK-FISL: xxlor 34, 0, 0 -; CHECK-FISL: lis 0, -1 -; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stxvd2x 36, 1, 0 +; CHECK-FISL: lis 3, -1 +; CHECK-FISL: ori 3, 3, 65520 +; CHECK-FISL: stxvd2x 36, 1, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test19 -- 2.40.0