struct FoldCandidate {
MachineInstr *UseMI;
- unsigned UseOpNo;
- MachineOperand *OpToFold;
- uint64_t ImmToFold;
+ union {
+ MachineOperand *OpToFold;
+ uint64_t ImmToFold;
+ int FrameIndexToFold;
+ };
+ unsigned char UseOpNo;
+ MachineOperand::MachineOperandType Kind;
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
- UseMI(MI), UseOpNo(OpNo) {
-
+ UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
if (FoldOp->isImm()) {
- OpToFold = nullptr;
ImmToFold = FoldOp->getImm();
+ } else if (FoldOp->isFI()) {
+ FrameIndexToFold = FoldOp->getIndex();
} else {
assert(FoldOp->isReg());
OpToFold = FoldOp;
}
}
+ bool isFI() const {
+ return Kind == MachineOperand::MO_FrameIndex;
+ }
+
bool isImm() const {
- return !OpToFold;
+ return Kind == MachineOperand::MO_Immediate;
+ }
+
+ bool isReg() const {
+ return Kind == MachineOperand::MO_Register;
}
};
return true;
}
+ if (Fold.isFI()) {
+ Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+ return true;
+ }
+
MachineOperand *New = Fold.OpToFold;
if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
TargetRegisterInfo::isVirtualRegister(New->getReg())) {
unsigned OpSize = TII->getOpSize(MI, 1);
MachineOperand &OpToFold = MI.getOperand(1);
- bool FoldingImm = OpToFold.isImm();
+ bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
// FIXME: We could also be folding things like FrameIndexes and
// TargetIndexes.
for (FoldCandidate &Fold : FoldList) {
if (updateOperand(Fold, TRI)) {
// Clear kill flags.
- if (!Fold.isImm()) {
+ if (Fold.isReg()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
// FIXME: Probably shouldn't bother trying to fold if not an
// SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
;
; CHECK-LABEL: {{^}}main:
; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
-; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200
-; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0
-; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]]
-; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]]
+; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
+; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
define amdgpu_ps float @main(i32 %idx) {