/// Set the Minimum Initiation Interval for this schedule attempt.
void setMII(unsigned mii) { MII = mii; }
- MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
- bool UpdateDAG = false);
+ void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
+
+ void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
/// Return the new base register that was stored away for the changed
/// instruction.
/// Apply changes to the instruction if needed. The changes are need
/// to improve the scheduling and depend up on the final schedule.
-MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
- SMSchedule &Schedule,
- bool UpdateDAG) {
+void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
+ SMSchedule &Schedule) {
SUnit *SU = getSUnit(MI);
DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
InstrChanges.find(SU);
std::pair<unsigned, int64_t> RegAndOffset = It->second;
unsigned BasePos, OffsetPos;
if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
- return nullptr;
+ return;
unsigned BaseReg = MI->getOperand(BasePos).getReg();
MachineInstr *LoopDef = findDefInLoop(BaseReg);
int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
int64_t NewOffset =
MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
NewMI->getOperand(OffsetPos).setImm(NewOffset);
- if (UpdateDAG) {
- SU->setInstr(NewMI);
- MISUnitMap[NewMI] = SU;
- }
+ SU->setInstr(NewMI);
+ MISUnitMap[NewMI] = SU;
NewMIs.insert(NewMI);
- return NewMI;
}
}
- return nullptr;
}
/// Return true for an order dependence that is loop carried potentially.
return true;
}
+/// Attempt to fix the degenerate cases when the instruction serialization
+/// causes the register lifetimes to overlap. For example,
+/// p' = store_pi(p, b)
+/// = load p, offset
+/// In this case p and p' overlap, which means that two registers are needed.
+/// Instead, this function changes the load to use p' and updates the offset.
+void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
+ unsigned OverlapReg = 0;
+ unsigned NewBaseReg = 0;
+ for (SUnit *SU : Instrs) {
+ MachineInstr *MI = SU->getInstr();
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Look for an instruction that uses p. The instruction occurs in the
+ // same cycle but occurs later in the serialized order.
+ if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
+ // Check that the instruction appears in the InstrChanges structure,
+ // which contains instructions that can have the offset updated.
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(SU);
+ if (It != InstrChanges.end()) {
+ unsigned BasePos, OffsetPos;
+ // Update the base register and adjust the offset.
+ if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
+ MI->getOperand(BasePos).setReg(NewBaseReg);
+ int64_t Offset = MI->getOperand(OffsetPos).getImm();
+ MI->getOperand(OffsetPos).setImm(Offset - It->second.second);
+ }
+ }
+ OverlapReg = 0;
+ NewBaseReg = 0;
+ break;
+ }
+ // Look for an instruction of the form p' = op(p), which uses and defines
+ // two virtual registers that get allocated to the same physical register.
+ unsigned TiedUseIdx = 0;
+ if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
+ // OverlapReg is p in the example above.
+ OverlapReg = MI->getOperand(TiedUseIdx).getReg();
+ // NewBaseReg is p' in the example above.
+ NewBaseReg = MI->getOperand(i).getReg();
+ break;
+ }
+ }
+ }
+}
+
/// After the schedule has been formed, call this function to combine
/// the instructions from the different stages/cycles. That is, this
/// function creates a schedule that represents a single iteration.
// map. We need to use the new registers to create the correct order.
for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
SUnit *SU = &SSD->SUnits[i];
- SSD->applyInstrChange(SU->getInstr(), *this, true);
+ SSD->applyInstrChange(SU->getInstr(), *this);
}
// Reorder the instructions in each cycle to fix and improve the
// Replace the old order with the new order.
cycleInstrs.swap(newOrderZC);
cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+ SSD->fixupRegisterOverlaps(cycleInstrs);
}
DEBUG(dump(););
bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
int &Value) const {
if (isPostIncrement(MI)) {
+ // For a post-increment, the offset is zero and the increment value is
+ // determined by the instruction's access size.
+ int Zero;
unsigned AccessSize;
- return getBaseAndOffset(MI, Value, AccessSize);
+ bool RetVal = getBaseAndOffset(MI, Zero, AccessSize);
+ Value = (int) AccessSize;
+ return RetVal;
}
if (MI.getOpcode() == Hexagon::A2_addi) {
Value = MI.getOperand(2).getImm();
Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
}
+/// Return true if we can update the offset in MI so that MI and MJ
+/// can be packetized together.
+bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) {
+ assert(SUI->getInstr() && SUJ->getInstr());
+ MachineInstr &MI = *SUI->getInstr();
+ MachineInstr &MJ = *SUJ->getInstr();
+
+ unsigned BPI, OPI;
+ if (!HII->getBaseAndOffsetPosition(MI, BPI, OPI))
+ return false;
+ unsigned BPJ, OPJ;
+ if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ))
+ return false;
+ unsigned Reg = MI.getOperand(BPI).getReg();
+ if (Reg != MJ.getOperand(BPJ).getReg())
+ return false;
+ // Make sure that the dependences do not restrict adding MI to the packet.
+ // That is, ignore anti dependences, and make sure the only data dependence
+ // involves the specific register.
+ for (const auto &PI : SUI->Preds)
+ if (PI.getKind() != SDep::Anti &&
+ (PI.getKind() != SDep::Data || PI.getReg() != Reg))
+ return false;
+ int Incr;
+ if (!HII->getIncrementValue(MJ, Incr))
+ return false;
+
+ int64_t Offset = MI.getOperand(OPI).getImm();
+ MI.getOperand(OPI).setImm(Offset + Incr);
+ ChangedOffset = Offset;
+ return true;
+}
+
+/// Undo the changed offset. This is needed if the instruction cannot be
+/// added to the current packet due to a different instruction.
+void HexagonPacketizerList::undoChangedOffset(MachineInstr &MI) {
+ unsigned BP, OP;
+ if (!HII->getBaseAndOffsetPosition(MI, BP, OP))
+ llvm_unreachable("Unable to find base and offset operands.");
+ MI.getOperand(OP).setImm(ChangedOffset);
+}
+
enum PredicateKind {
PK_False,
PK_True,
GlueToNewValueJump = false;
GlueAllocframeStore = false;
FoundSequentialDependence = false;
+ ChangedOffset = INT64_MAX;
}
// Ignore bundling of pseudo instructions.
useCalleesSP(I);
GlueAllocframeStore = false;
}
+
+ if (ChangedOffset != INT64_MAX)
+ undoChangedOffset(I);
+ else if (updateOffset(SUI, SUJ)) {
+ FoundSequentialDependence = false;
+ Dependence = false;
+ return true;
+ }
+
return false;
}
// Has the feeder instruction been glued to new value jump.
bool GlueToNewValueJump;
+ // This holds the offset value, when pruning the dependences.
+ int64_t ChangedOffset;
+
// Check if there is a dependence between some instruction already in this
// packet and this instruction.
bool Dependence;
bool demoteToDotOld(MachineInstr &MI);
bool useCallersSP(MachineInstr &MI);
void useCalleesSP(MachineInstr &MI);
+ bool updateOffset(SUnit *SUI, SUnit *SUJ);
+ void undoChangedOffset(MachineInstr &MI);
bool arePredicatesComplements(MachineInstr &MI1, MachineInstr &MI2);
bool restrictingDepExistInPacket(MachineInstr&, unsigned);
bool isNewifiable(const MachineInstr &MI, const TargetRegisterClass *NewRC);
--- /dev/null
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test that the instruction ordering code in the pipeliner fixes up dependences
+; between post-increment register definitions and uses so that the register
+; allocator does not allocate an additional register. The following test case
+; should generate a single packet.
+
+; CHECK: loop0(.LBB0_[[LOOP:.]],
+; CHECK: .LBB0_[[LOOP]]:
+; CHECK: {
+; CHECK-NOT: {
+; CHECK: :endloop0
+
+define void @test(i64* nocapture %v1, i64 %v2, i32 %len) local_unnamed_addr #0 {
+entry:
+ %cmp7 = icmp sgt i32 %len, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body:
+ %arrayidx.phi = phi i64* [ %arrayidx.inc, %for.body ], [ %v1, %entry ]
+ %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %0 = load i64, i64* %arrayidx.phi, align 8
+ %1 = tail call i64 @llvm.hexagon.M2.mmpyul.rs1(i64 %0, i64 %v2)
+ store i64 %1, i64* %arrayidx.phi, align 8
+ %inc = add nuw nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %inc, %len
+ %arrayidx.inc = getelementptr i64, i64* %arrayidx.phi, i32 1
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+declare i64 @llvm.hexagon.M2.mmpyul.rs1(i64, i64) #1
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" }
+attributes #1 = { nounwind readnone }