From: Matt Arsenault Date: Thu, 28 Feb 2019 00:01:05 +0000 (+0000) Subject: GlobalISel: Implement moreElementsVector for phi X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0d2ad48b33165c74099ce5833c4390bcb9213f8c;p=llvm GlobalISel: Implement moreElementsVector for phi git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355047 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 939955a9fdd..66c66425691 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -186,6 +186,9 @@ private: LegalizeResult fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy); + LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 67f90cd2ab3..8491763d991 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2392,6 +2392,25 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy) { + assert(TypeIdx == 0 && "Expecting only Idx 0"); + + Observer.changingInstr(MI); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + moreElementsVectorSrc(MI, MoreTy, I); + } + + MachineBasicBlock &MBB = *MI.getParent(); + MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy) { @@ -2441,6 +2460,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_PHI: + return moreElementsVectorPhi(MI, TypeIdx, MoreTy); default: return UnableToLegalize; } diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 3b66b31ce44..d83daba16ae 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -159,6 +159,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .legalFor(AddrSpaces32) .clampScalar(0, S32, S256) .widenScalarToNextPow2(0, 32) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .legalIf(isPointer(0)); diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index a4a086b1978..30bd542da8a 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -102,7 +102,80 @@ body: | $vgpr0 = COPY %6 S_SETPC_B64 undef $sgpr30_sgpr31 ... + --- +name: test_phi_v3s16 +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: test_phi_v3s16 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[BUILD_VECTOR]](<3 x s16>), 0 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT1]](<4 x s16>), %bb.1 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[PHI]](<4 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %1, %2 + %4:_(<3 x s16>) = G_EXTRACT %0, 0 + G_BRCOND %3, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %5:_(<3 x s16>) = G_ADD %4, %4 + G_BR %bb.2 + + bb.2: + %6:_(<3 x s16>) = G_PHI %4, %bb.0, %5, %bb.1 + %7:_(<4 x s16>) = G_IMPLICIT_DEF + %8:_(<4 x s16>) = G_INSERT %7, %6, 0 + $vgpr0_vgpr1 = COPY %8 + S_SETPC_B64 undef $sgpr30_sgpr31 +... + +--- + name: test_phi_v4s16 tracksRegLiveness: true