From fdc698b726543ec9a3066a8f98d95161c4b4d9f1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 21 Oct 2019 19:53:46 +0000 Subject: [PATCH] AMDGPU: Erase redundant redefs of m0 in SIFoldOperands Only handle simple inter-block redefs of m0 to the same value. This avoids interference from redefs of m0 in SILoadStoreOptimzer. I was initially teaching that pass to ignore redefs of m0, but having them not exist beforehand is much simpler. This is in preparation for deleting the current special m0 handling in SIFixSGPRCopies to allow the register coalescer to handle the difficult cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375449 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIFoldOperands.cpp | 21 + .../AMDGPU/fold-operands-remove-m0-redef.mir | 366 ++++++++++++++++++ 2 files changed, 387 insertions(+) create mode 100644 test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index bdbcc658b88..4eac0316876 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1349,6 +1349,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; + + MachineOperand *CurrentKnownM0Val = nullptr; for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; @@ -1361,6 +1363,25 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) tryFoldClamp(MI); + + // Saw an unknown clobber of m0, so we no longer know what it is. + if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) + CurrentKnownM0Val = nullptr; + continue; + } + + // Specially track simple redefs of m0 to the same value in a block, so we + // can erase the later ones. + if (MI.getOperand(0).getReg() == AMDGPU::M0) { + MachineOperand &NewM0Val = MI.getOperand(1); + if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { + MI.eraseFromParent(); + continue; + } + + // We aren't tracking other physical registers + CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ? + nullptr : &NewM0Val; continue; } diff --git a/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir b/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir new file mode 100644 index 00000000000..10b49e68483 --- /dev/null +++ b/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir @@ -0,0 +1,366 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @redef_m0_same_copy() { ret void } + define amdgpu_kernel void @multi_redef_m0_same_copy() { ret void } + define amdgpu_kernel void @redef_m0_different_copy() { ret void } + define amdgpu_kernel void @redef_m0_mixed_copy0() { ret void } + define amdgpu_kernel void @redef_m0_mixed_copy1() { ret void } + define amdgpu_kernel void @redef_m0_same_mov_imm() { ret void } + define amdgpu_kernel void @redef_m0_different_inst0() { ret void } + define amdgpu_kernel void @redef_m0_different_inst1() { ret void } + define amdgpu_kernel void @redef_m0_mixed_read_m0() { ret void } + define amdgpu_kernel void @redef_m0_same_copy_call() { ret void } + define amdgpu_kernel void @redef_m0_same_copy_multi_block() { ret void } + define amdgpu_kernel void @redef_m0_copy_self() { ret void } + define amdgpu_kernel void @redef_m0_copy_physreg() { ret void } + + declare void @func() +... + +--- +name: redef_m0_same_copy +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: multi_redef_m0_same_copy +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: multi_redef_m0_same_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_copy +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_different_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_copy0 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_copy0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_copy1 +tracksRegLiveness: true + +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_copy1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + $m0 = COPY %1 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_same_mov_imm +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_mov_imm + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = S_MOV_B32 -1 + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = S_MOV_B32 -1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = S_MOV_B32 -1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_inst0 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_different_inst0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = IMPLICIT_DEF + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = IMPLICIT_DEF + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_inst1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_different_inst1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: S_NOP 0, implicit-def $m0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + S_NOP 0, implicit-def $m0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_read_m0 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_read_m0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_2:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 128, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %5:vgpr_32 = DS_READ_B32 %0, 128, 0, implicit $m0, implicit $exec :: (load 4) +... + +--- +name: redef_m0_same_copy_call +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_copy_call + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_same_copy_multi_block +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: redef_m0_same_copy_multi_block + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: bb.1: + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + bb.0: + liveins: $vgpr0, $sgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + + bb.1: + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_copy_self +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_copy_self + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY $m0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY $m0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_copy_physreg +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_copy_physreg + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY $sgpr0 + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $sgpr0 = S_MOV_B32 0 + ; GCN: $m0 = COPY $sgpr0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY $sgpr0 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $sgpr0 = S_MOV_B32 0 + $m0 = COPY $sgpr0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... -- 2.40.0