From e3747985b52dfd5572a73e8cecaa43cb481c887b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 20 May 2019 14:09:36 +0000 Subject: [PATCH] RegAlloc: Fix verifier error with undef identity copies The code did not match the example in the comment, and was checking the undef flag on the copy dest instead of source. The existing tests were only hitting the > 2 operands case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361156 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/VirtRegMap.cpp | 2 +- .../virtregrewrite-undef-identity-copy.mir | 69 +++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index d7f21399826..4a06704a887 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -384,7 +384,7 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { // give us additional liveness information: The target (super-)register // must not be valid before this point. Replace the COPY with a KILL // instruction to maintain this information. - if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) { + if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) { MI.setDesc(TII->get(TargetOpcode::KILL)); LLVM_DEBUG(dbgs() << " replace by: " << MI); return; diff --git a/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir new file mode 100644 index 00000000000..d54d2472079 --- /dev/null +++ b/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir @@ -0,0 +1,69 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -start-before=greedy -stop-after=virtregrewriter -verify-machineinstrs -o - %s | FileCheck %s + +# The undef copy of %4 is allocated to $vgpr3, and the identity copy +# was deleted, and $vgpr3 was considered undef. The code to replace +# the undef copy with a kill was incorrectly checking the dest +# operand, rather than the source. + +--- | + define amdgpu_kernel void @undef_identity_copy() { + ret void + } + + declare hidden float @bar(<4 x float>) + declare hidden void @foo() + +... +--- +name: undef_identity_copy +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + hasCalls: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + scratchWaveOffsetReg: '$sgpr95' + frameOffsetReg: '$sgpr95' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; CHECK-LABEL: name: undef_identity_copy + ; CHECK: renamable $vgpr32_vgpr33_vgpr34_vgpr35 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1) + ; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 + ; CHECK: $sgpr4 = COPY $sgpr95 + ; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4 + ; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 + ; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 + ; CHECK: $sgpr4 = COPY $sgpr95 + ; CHECK: $vgpr0 = COPY renamable $vgpr32 + ; CHECK: $vgpr1 = COPY renamable $vgpr33 + ; CHECK: $vgpr2 = COPY renamable $vgpr34 + ; CHECK: $vgpr3 = KILL undef renamable $vgpr3 + ; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0 + ; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 + ; CHECK: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; CHECK: S_ENDPGM 0 + %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1) + %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc + ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 + $sgpr4 = COPY $sgpr95 + dead $sgpr30_sgpr31 = SI_CALL %2, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4 + ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 + %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc + ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 + $sgpr4 = COPY $sgpr95 + $vgpr0 = COPY %0.sub0 + $vgpr1 = COPY %0.sub1 + $vgpr2 = COPY %0.sub2 + $vgpr3 = COPY undef %4:vgpr_32 + dead $sgpr30_sgpr31 = SI_CALL %3, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0 + %5:vgpr_32 = COPY $vgpr0 + ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 + FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + S_ENDPGM 0 + +... -- 2.40.0