From 8672594561920f2f52303bc653760438e57f14e1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 18 Oct 2019 23:24:25 +0000 Subject: [PATCH] LiveIntervals: Fix handleMoveUp with subreg def moving across a def If a subregister def was moved across another subregister def and another use, the main range was not correctly updated. The end point of the moved interval ended too early and missed the use from theh other lanes in the subreg def. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375300 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervals.cpp | 17 ++- ...dleMoveUp-subreg-def-across-subreg-def.mir | 134 ++++++++++++++++++ unittests/MI/LiveIntervalTest.cpp | 40 ++++++ 3 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp index 9d85fc3e08d..2989930ad09 100644 --- a/lib/CodeGen/LiveIntervals.cpp +++ b/lib/CodeGen/LiveIntervals.cpp @@ -1288,6 +1288,20 @@ private: const SlotIndex SplitPos = NewIdxDef; OldIdxVNI = OldIdxIn->valno; + SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end; + LiveRange::iterator Prev = std::prev(OldIdxIn); + if (OldIdxIn != LR.begin() && + SlotIndex::isEarlierInstr(NewIdx, Prev->end)) { + // If the segment before OldIdx read a value defined earlier than + // NewIdx, the moved instruction also reads and forwards that + // value. Extend the lifetime of the new def point. + + // Extend to where the previous range started, unless there is + // another redef first. + NewDefEndPoint = std::min(OldIdxIn->start, + std::next(NewIdxOut)->start); + } + // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut. OldIdxOut->valno->def = OldIdxIn->start; *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end, @@ -1305,7 +1319,8 @@ private: // There is no gap between NewSegment and its predecessor. *NewSegment = LiveRange::Segment(Next->start, SplitPos, Next->valno); - *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI); + + *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI); Next->valno->def = SplitPos; } else { // There is a gap between NewSegment and its predecessor diff --git a/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir new file mode 100644 index 00000000000..47c8e19d63c --- /dev/null +++ b/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir @@ -0,0 +1,134 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -verify-misched -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- +name: handleMoveUp_incorrect_interval +tracksRegLiveness: true +liveins: + - { reg: '$sgpr4_sgpr5', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + scratchWaveOffsetReg: '$sgpr101' + frameOffsetReg: '$sgpr101' + stackPtrOffsetReg: '$sgpr101' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } +body: | + ; CHECK-LABEL: name: handleMoveUp_incorrect_interval + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5329 + ; CHECK: undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: INLINEASM &"", 1, 851978, def dead %11 + ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) + ; CHECK: INLINEASM &"def $0 $1", 1, 851978, def %15, 851978, def %16 + ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec + ; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec + ; CHECK: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec + ; CHECK: INLINEASM &"def $0 $1", 1, 851978, def %21, 851978, def %22 + ; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] + ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[DEF2]], implicit $exec + ; CHECK: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U32_e64 64, [[V_ADD_U32_e32_]], implicit $exec + ; CHECK: [[DEF]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] + ; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: INLINEASM &"", 1, 851978, def dead [[V_MOV_B32_e32_2]], 851978, def dead [[V_MOV_B32_e32_3]], 851977, [[DS_READ_B64_gfx9_]].sub0, 2147483657, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193, [[V_MOV_B32_e32_3]](tied-def 5), 851977, %15, 851977, %16, 851977, [[DS_READ_B32_gfx9_1]], 851977, [[DS_READ_B32_gfx9_]], 851977, [[DS_READ_B32_gfx9_3]], 851977, [[DS_READ_B32_gfx9_2]] + ; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) + ; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) + ; CHECK: DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; CHECK: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_ADD_U32_e32_]], [[S_MOV_B32_]], implicit $exec + ; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_ADD_U32_e32_]], [[V_CMP_GT_U32_e64_]], implicit $exec + ; CHECK: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_]], [[DEF1]], implicit $exec + ; CHECK: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_CNDMASK_B32_e64_]], [[S_MOV_B32_]], implicit $exec + ; CHECK: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_SUB_U32_e32_]], [[DEF]].sub0, implicit $exec + ; CHECK: [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_1]], [[V_MUL_LO_U32_]], implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK: [[DEF]].sub0:vreg_64 = V_ADD_U32_e32 [[V_SUB_U32_e32_1]], [[V_ADD_U32_e32_1]], implicit $exec + ; CHECK: undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec + ; CHECK: undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec + ; CHECK: undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec + ; CHECK: %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec + ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1) + ; CHECK: INLINEASM &"", 1 + ; CHECK: [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3) + ; CHECK: GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; CHECK: %31.sub0:vreg_64 = COPY [[S_LOAD_DWORD_IMM]], implicit $exec + ; CHECK: DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $sgpr4_sgpr5 + + %0:sgpr_64(p4) = COPY $sgpr4_sgpr5 + %1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + %3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + %4:sreg_32_xm0 = S_MOV_B32 5329 + undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %7:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + %8:vreg_64 = IMPLICIT_DEF + %9:vgpr_32 = IMPLICIT_DEF + %10:vgpr_32 = IMPLICIT_DEF + + bb.1: + INLINEASM &"", 1, 851978, def %11:vgpr_32 + GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + %13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) + INLINEASM &"def $0 $1", 1, 851978, def %15:vgpr_32, 851978, def %16:vgpr_32 + %17:vgpr_32 = DS_READ_B32_gfx9 %6, 0, 0, implicit $exec + %18:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec + %19:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec + INLINEASM &"def $0 $1", 1, 851978, def %21:vgpr_32, 851978, def %22:vgpr_32 + %23:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec + %24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %5.sub1:vreg_64 = COPY %6 + %25:vgpr_32 = V_ADD_U32_e32 1, %10, implicit $exec + %26:sreg_64_xexec = V_CMP_GT_U32_e64 64, %25, implicit $exec + %27:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + INLINEASM &"", 1, 851978, def dead %24, 851978, def dead %27, 851977, %13.sub0, 2147483657, %24(tied-def 3), 2147549193, %27(tied-def 5), 851977, %15, 851977, %16, 851977, %18, 851977, %17, 851977, %23, 851977, %19 + DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) + DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) + DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) + undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %33:vgpr_32 = V_MUL_LO_U32 %25, %4, implicit $exec + %10:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %25, %26, implicit $exec + %34:vgpr_32 = V_SUB_U32_e32 %33, %9, implicit $exec + %9:vgpr_32 = V_MUL_LO_U32 %10, %4, implicit $exec + %35:vgpr_32 = V_ADD_U32_e32 %34, %8.sub0, implicit $exec + %36:vgpr_32 = V_SUB_U32_e32 %9, %33, implicit $exec + %37:vgpr_32 = COPY %3.sub1 + undef %8.sub0:vreg_64 = V_ADD_U32_e32 %36, %35, implicit $exec + %8.sub1:vreg_64 = COPY %6 + undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 %3.sub0, %8.sub0, 0, implicit $exec + undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec + undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec + %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec + %43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1) + INLINEASM &"", 1 + %44:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3) + GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + %31.sub0:vreg_64 = COPY %43, implicit $exec + DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3) + S_BRANCH %bb.1 + +... diff --git a/unittests/MI/LiveIntervalTest.cpp b/unittests/MI/LiveIntervalTest.cpp index c81f13f6e67..782df239dea 100644 --- a/unittests/MI/LiveIntervalTest.cpp +++ b/unittests/MI/LiveIntervalTest.cpp @@ -421,6 +421,46 @@ TEST(LiveIntervalTest, DeadSubRegMoveUp) { }); } +TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDef) { + liveIntervalTest(R"MIR( + %1:vreg_64 = IMPLICIT_DEF + + bb.1: + %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec + undef %1.sub0:vreg_64 = V_ADD_U32_e32 %2, %2, implicit $exec + %1.sub1:vreg_64 = COPY %2 + S_NOP 0, implicit %1.sub1 + S_BRANCH %bb.1 + +)MIR", [](MachineFunction &MF, LiveIntervals &LIS) { + MachineInstr &UndefSubregDef = getMI(MF, 2, 1); + // The scheduler clears undef from subregister defs before moving + UndefSubregDef.getOperand(0).setIsUndef(false); + testHandleMove(MF, LIS, 3, 1, 1); + }); +} + +TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDefMulti) { + liveIntervalTest(R"MIR( + %1:vreg_96 = IMPLICIT_DEF + + bb.1: + %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec + undef %1.sub0:vreg_96 = V_ADD_U32_e32 %2, %2, implicit $exec + %1.sub1:vreg_96 = COPY %2 + %1.sub2:vreg_96 = COPY %2 + S_NOP 0, implicit %1.sub1, implicit %1.sub2 + S_BRANCH %bb.1 + +)MIR", [](MachineFunction &MF, LiveIntervals &LIS) { + MachineInstr &UndefSubregDef = getMI(MF, 2, 1); + // The scheduler clears undef from subregister defs before moving + UndefSubregDef.getOperand(0).setIsUndef(false); + testHandleMove(MF, LIS, 4, 1, 1); + }); +} int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); initLLVM(); -- 2.40.0