#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#define DEBUG_TYPE "ppc-mi-peepholes"
+STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
+
namespace llvm {
void initializePPCMIPeepholePass(PassRegistry&);
}
}
private:
+ MachineDominatorTree *MDT;
+
// Initialize class variables.
void initialize(MachineFunction &MFParm);
unsigned lookThruCopyLike(unsigned SrcReg);
public:
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(*MF.getFunction()))
void PPCMIPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
DEBUG(MF->dump());
}
+static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
+ MachineRegisterInfo *MRI) {
+ assert(Op && "Invalid Operand!");
+ if (!Op->isReg())
+ return nullptr;
+
+ unsigned Reg = Op->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return nullptr;
+
+ return MRI->getVRegDef(Reg);
+}
+
// Perform peephole optimizations.
bool PPCMIPeephole::simplifyCode(void) {
bool Simplified = false;
}
break;
}
+
+ // TODO: Any instruction that has an immediate form fed only by a PHI
+ // whose operands are all load immediate can be folded away. We currently
+ // do this for ADD instructions, but should expand it to arithmetic and
+ // binary instructions with immediate forms in the future.
+ case PPC::ADD4:
+ case PPC::ADD8: {
+ auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
+ assert(PhiOp && "Invalid Operand!");
+ MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
+
+ return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
+ MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
+ };
+
+ auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
+ MachineOperand *PhiOp) {
+ assert(PhiOp && "Invalid Operand!");
+ assert(DominatorOp && "Invalid Operand!");
+ MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
+ MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
+
+ // Note: the vregs only show up at odd indices position of PHI Node,
+ // the even indices position save the BB info.
+ for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
+ MachineInstr *LiMI =
+ getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
+ if (!LiMI || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
+ !MDT->dominates(DefDomMI, LiMI) ||
+ (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8))
+ return false;
+ }
+
+ return true;
+ };
+
+ MachineOperand Op1 = MI.getOperand(1);
+ MachineOperand Op2 = MI.getOperand(2);
+ if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
+ std::swap(Op1, Op2);
+ else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
+ break; // We don't have an ADD fed by LI's that can be transformed
+
+ // Now we know that Op1 is the PHI node and Op2 is the dominator
+ unsigned DominatorReg = Op2.getReg();
+
+ const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
+ ? &PPC::G8RC_and_G8RC_NOX0RegClass
+ : &PPC::GPRC_and_GPRC_NOR0RegClass;
+ MRI->setRegClass(DominatorReg, TRC);
+
+ // replace LIs with ADDIs
+ MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
+ for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
+ MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
+ DEBUG(dbgs() << "Optimizing LI to ADDI: ");
+ DEBUG(LiMI->dump());
+
+ // There could be repeated registers in the PHI, e.g: %vreg1<def> =
+ // PHI %vreg6, <BB#2>, %vreg8, <BB#3>, %vreg8, <BB#6>; So if we've
+ // already replaced the def instruction, skip.
+ if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
+ continue;
+
+ assert((LiMI->getOpcode() == PPC::LI ||
+ LiMI->getOpcode() == PPC::LI8) &&
+ "Invalid Opcode!");
+ auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
+ LiMI->RemoveOperand(1); // remove the imm of LI
+ LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
+ : PPC::ADDI8));
+ MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
+ .addReg(DominatorReg)
+ .addImm(LiImm); // restore the imm of LI
+ DEBUG(LiMI->dump());
+ }
+
+ // Replace ADD with COPY
+ DEBUG(dbgs() << "Optimizing ADD to COPY: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(Op1);
+ ToErase = &MI;
+ Simplified = true;
+ NumOptADDLIs++;
+ break;
+ }
}
}
+
// If the last instruction was marked for elimination,
// remove it now.
if (ToErase) {
--- /dev/null
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+define i64 @testOptimizeLiAddToAddi(i64 %a) {
+; CHECK-LABEL: testOptimizeLiAddToAddi:
+; CHECK: addi 3, 30, 2444
+; CHECK: bl callv
+; CHECK: addi 3, 30, 234
+; CHECK: bl call
+; CHECK: blr
+entry:
+ %cmp = icmp sgt i64 %a, 33
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ tail call void bitcast (void (...)* @callv to void ()*)()
+ br label %if.end
+
+if.end:
+ %add.0 = phi i64 [ 234, %if.then ], [ 2444, %entry ]
+ %add2 = add nsw i64 %add.0, %a
+ %call = tail call i64 @call(i64 %add2)
+ ret i64 %call
+}
+
+define i64 @testThreePhiIncomings(i64 %a) {
+; CHECK-LABEL: testThreePhiIncomings:
+; CHECK: bl callv
+; CHECK: addi 3, 30, 234
+; CHECK: addi 3, 30, 2444
+; CHECK: bl callv
+; CHECK: addi 3, 30, 345
+; CHECK: bl call
+; CHECK: blr
+entry:
+ %cmp = icmp slt i64 %a, 33
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ tail call void bitcast (void (...)* @callv to void ()*)()
+ br label %if.end4
+
+if.else:
+ %cmp1 = icmp slt i64 %a, 55
+ br i1 %cmp1, label %if.then2, label %if.end4
+
+if.then2:
+ tail call void bitcast (void (...)* @callv to void ()*)()
+ br label %if.end4
+
+if.end4:
+ %add.0 = phi i64 [ 234, %if.then ], [ 345, %if.then2 ], [ 2444, %if.else ]
+ %add5 = add nsw i64 %add.0, %a
+ %call = tail call i64 @call(i64 %add5)
+ ret i64 %call
+}
+
+declare void @callv(...)
+
+declare i64 @call(i64)