From: Matt Arsenault Date: Wed, 14 Sep 2016 15:51:33 +0000 (+0000) Subject: AMDGPU: Support folding FrameIndex operands X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0f7125844e6bb8ba04df71664a081e43b7566667;p=llvm AMDGPU: Support folding FrameIndex operands This avoids test regressions in a future commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281491 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index b55dee68d51..e1257b1d33e 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -48,24 +48,36 @@ public: struct FoldCandidate { MachineInstr *UseMI; - unsigned UseOpNo; - MachineOperand *OpToFold; - uint64_t ImmToFold; + union { + MachineOperand *OpToFold; + uint64_t ImmToFold; + int FrameIndexToFold; + }; + unsigned char UseOpNo; + MachineOperand::MachineOperandType Kind; FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : - UseMI(MI), UseOpNo(OpNo) { - + UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) { if (FoldOp->isImm()) { - OpToFold = nullptr; ImmToFold = FoldOp->getImm(); + } else if (FoldOp->isFI()) { + FrameIndexToFold = FoldOp->getIndex(); } else { assert(FoldOp->isReg()); OpToFold = FoldOp; } } + bool isFI() const { + return Kind == MachineOperand::MO_FrameIndex; + } + bool isImm() const { - return !OpToFold; + return Kind == MachineOperand::MO_Immediate; + } + + bool isReg() const { + return Kind == MachineOperand::MO_Register; } }; @@ -107,6 +119,11 @@ static bool updateOperand(FoldCandidate &Fold, return true; } + if (Fold.isFI()) { + Old.ChangeToFrameIndex(Fold.FrameIndexToFold); + return true; + } + MachineOperand *New = Fold.OpToFold; if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && TargetRegisterInfo::isVirtualRegister(New->getReg())) { @@ -448,7 +465,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); - bool FoldingImm = OpToFold.isImm(); + bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); // FIXME: We could also be folding things like FrameIndexes and // TargetIndexes. @@ -500,7 +517,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. - if (!Fold.isImm()) { + if (Fold.isReg()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); // FIXME: Probably shouldn't bother trying to fold if not an // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR diff --git a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/test/CodeGen/AMDGPU/local-stack-slot-bug.ll index 6e6f289f5d6..eb554e21730 100644 --- a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ b/test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -7,10 +7,8 @@ ; ; CHECK-LABEL: {{^}}main: ; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 -; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200 -; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0 -; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]] -; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]] +; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]] +; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]] ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen define amdgpu_ps float @main(i32 %idx) {