void SystemZHazardRecognizer::Reset() {
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
clearProcResCounters();
GrpCount = 0;
LastFPdOpCycleIdx = UINT_MAX;
if (SC->BeginGroup)
return (CurrGroupSize == 0);
+ // An instruction with 4 register operands will not fit in last slot.
+ assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) ||
+ "Current decoder group is already full!");
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return false;
+
// Since a full group is handled immediately in EmitInstruction(),
// SU should fit into current group. NumSlots should be 1 or 0,
// since it is not a cracked or expanded instruction.
return true;
}
+bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ const MCInstrDesc &MID = MI->getDesc();
+ unsigned Count = 0;
+ for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
+ const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+ if (RC == nullptr)
+ continue;
+ if (OpIdx >= MID.getNumDefs() &&
+ MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ continue;
+ Count++;
+ }
+ return Count >= 4;
+}
+
void SystemZHazardRecognizer::nextGroup() {
if (CurrGroupSize == 0)
return;
// Reset counter for next group.
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
// Decrease counters for execution units by one.
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
OS << "/EndsGroup";
if (SU->isUnbuffered)
OS << "/Unbuffered";
+ if (has4RegOps(SU->getInstr()))
+ OS << "/4RegOps";
}
void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
dbgs() << "{ " << CurGroupDbg << " }";
dbgs() << " (" << CurrGroupSize << " decoder slot"
<< (CurrGroupSize > 1 ? "s":"")
+ << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
<< ")\n";
}
}
// Insert SU into current group by increasing number of slots used
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
- assert (CurrGroupSize <= 3);
+ CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
+ unsigned GroupLim =
+ ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
+ assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == 3 || SC->EndGroup)
+ if (CurrGroupSize == GroupLim || SC->EndGroup)
nextGroup();
}
return -1;
}
+ // An instruction with 4 register operands will not fit in last slot.
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return 1;
+
// Most instructions can be placed in any decoder slot.
return 0;
}
/// SystemZHazardRecognizer maintains the state for one MBB during scheduling.
class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
-#ifndef NDEBUG
const SystemZInstrInfo *TII;
-#endif
const TargetSchedModel *SchedModel;
/// Keep track of the number of decoder slots used in the current
/// decoder group.
unsigned CurrGroupSize;
+ /// True if an instruction with four reg operands have been scheduled into
+ /// the current decoder group.
+ bool CurrGroupHas4RegOps;
+
/// The tracking of resources here are quite similar to the common
/// code use of a critical resource. However, z13 differs in the way
/// that it has two processor sides which may be interesting to
/// Return true if MI fits into current decoder group.
bool fitsIntoCurrentGroup(SUnit *SU) const;
+ /// Return true if this instruction has four register operands.
+ bool has4RegOps(const MachineInstr *MI) const;
+
/// Two decoder groups per cycle are formed (for z13), meaning 2x3
/// instructions. This function returns a number between 0 and 5,
/// representing the current decoder slot of the current cycle. If an SU
public:
SystemZHazardRecognizer(const SystemZInstrInfo *tii,
const TargetSchedModel *SM)
- :
-#ifndef NDEBUG
- TII(tii),
-#endif
- SchedModel(SM) {
+ : TII(tii), SchedModel(SM) {
Reset();
}
; CHECK-NEXT: vpkg %v6, %v6, %v7
; CHECK-NEXT: vpkg %v4, %v4, %v5
; CHECK-NEXT: vn %v5, %v16, %v6
-; CHECK-NEXT: vsel %v24, %v3, %v2, %v5
-; CHECK-NEXT: vldeb %v17, %v17
+; CHECK-DAG: vsel %v24, %v3, %v2, %v5
+; CHECK-DAG: vldeb %v17, %v17
; CHECK-NEXT: vldeb %v18, %v18
; CHECK-NEXT: vfchdb %v17, %v18, %v17
; CHECK-NEXT: vmrhf %v18, %v30, %v30