#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOutliner.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/ErrorHandling.h"
/// operand folded, otherwise NULL is returned.
/// The new instruction is inserted before MI, and the client is responsible
/// for removing the old instruction.
+ /// If VRM is passed, the assigned physregs can be inspected by target to
+ /// decide on using an opcode (note that those assignments can still change).
MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops,
int FI,
- LiveIntervals *LIS = nullptr) const;
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const;
/// Same as the previous version except it allows folding of any load and
/// store from / to any address, not just from a specific stack slot.
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr) const {
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const {
return nullptr;
}
return false;
}
+ /// Add passes to be run immediately after virtual registers are rewritten
+ /// to physical registers.
+ virtual void addPostRewrite() { }
+
/// This method may be implemented by targets that want to run passes after
/// register allocation pass pipeline but before prolog-epilog insertion.
virtual void addPostRegAlloc() { }
MachineInstr *FoldMI =
LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
- : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS);
+ : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
if (!FoldMI)
return false;
MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
ArrayRef<unsigned> Ops, int FI,
- LiveIntervals *LIS) const {
+ LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
auto Flags = MachineMemOperand::MONone;
for (unsigned OpIdx : Ops)
Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
MBB->insert(MI, NewMI);
} else {
// Ask the target to do the actual folding.
- NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS);
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
}
if (NewMI) {
addPass(&MachineSchedulerID);
if (addRegAssignmentOptimized()) {
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ addPostRewrite();
+
// Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced.
addPass(&MachineCopyPropagationID);
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS) const {
+ LiveIntervals *LIS, VirtRegMap *VRM) const {
// This is a bit of a hack. Consider this instruction:
//
// %0 = COPY %sp; GPR64all:%0
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr) const override;
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
/// \returns true if a branch from an instruction with opcode \p BranchOpc
/// bytes is capable of jumping to a position \p BrOffset bytes away.
SystemZMCInstLower.cpp
SystemZRegisterInfo.cpp
SystemZSelectionDAGInfo.cpp
+ SystemZPostRewrite.cpp
SystemZShortenInst.cpp
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZTDCPass();
} // end namespace llvm
string OpKey = "";
string OpType = "none";
+ // MemKey identifies a targe reg-mem opcode, while MemType can be either
+ // "pseudo" or "target". This is used to map a pseduo memory instruction to
+ // its corresponding target opcode. See comment at MemFoldPseudo.
+ string MemKey = "";
+ string MemType = "none";
+
// Many distinct-operands instructions have older 2-operand equivalents.
// NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs,
// with NumOpsValue being "2" or "3" as appropriate.
let ValueCols = [["20"]];
}
-// Return the memory form of a register instruction.
+// Return the memory form of a register instruction. Note that this may
+// return a MemFoldPseudo instruction (see below).
def getMemOpcode : InstrMapping {
let FilterClass = "InstSystemZ";
let RowFields = ["OpKey"];
let ValueCols = [["mem"]];
}
-// Return the 3-operand form of a 2-operand instruction.
-def getThreeOperandOpcode : InstrMapping {
+// Return the target memory instruction for a MemFoldPseudo.
+def getTargetMemOpcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["MemKey"];
+ let ColFields = ["MemType"];
+ let KeyCol = ["pseudo"];
+ let ValueCols = [["target"]];
+}
+
+// Return the 2-operand form of a 3-operand instruction.
+def getTwoOperandOpcode : InstrMapping {
let FilterClass = "InstSystemZ";
let RowFields = ["NumOpsKey"];
let ColFields = ["NumOpsValue"];
- let KeyCol = ["2"];
- let ValueCols = [["3"]];
+ let KeyCol = ["3"];
+ let ValueCols = [["2"]];
}
//===----------------------------------------------------------------------===//
mnemonic#"\t$R1, $R2, $R3",
[(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
let M4 = 0;
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
RegisterOperand cls2> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
+ def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>,
Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>;
}
}
RegisterOperand cls2> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
+ def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>,
Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>;
}
}
Immediate imm> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>,
+ def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>;
}
}
SDPatternOperator operator, RegisterOperand cls> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>,
+ def K : BinaryRSY<mnemonic##"k", opcode2, operator, cls>,
Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
}
}
RegisterOperand cls, Immediate imm> {
let NumOpsKey = key in {
let NumOpsValue = "3" in
- def K : BinaryRIEPseudo<null_frag, cls, imm>,
+ def K : BinaryRIEPseudo<operator, cls, imm>,
Requires<[FeatureHighWord, FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRIPseudo<operator, cls, imm>,
Requires<[FeatureHighWord]>;
}
}
+// A pseudo that is used during register allocation when folding a memory
+// operand. The 3-address register instruction with a spilled source cannot
+// be converted directly to a target 2-address reg/mem instruction.
+// Mapping: <INSN>R -> MemFoldPseudo -> <INSN>
+class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode>
+ : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> {
+ let OpKey = mnemonic#"rk"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "pseudo";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let HasIndex = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
// Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
-// Define an instruction that operates on two fixed-length blocks of memory,
-// and associated pseudo instructions for operating on blocks of any size.
-// The Sequence form uses a straight-line sequence of instructions and
-// the Loop form uses a loop of length-256 instructions followed by
-// another instruction to handle the excess.
-multiclass MemorySS<string mnemonic, bits<8> opcode,
- SDPatternOperator sequence, SDPatternOperator loop> {
- def "" : SideEffectBinarySSa<mnemonic, opcode>;
- let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
- def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length),
- [(sequence bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length)]>;
- def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256),
- [(loop bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256)]>;
- }
-}
-
-// The same, but setting a CC result as comparion operator.
-multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
- SDPatternOperator sequence, SDPatternOperator loop> {
- def "" : SideEffectBinarySSa<mnemonic, opcode>;
- let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
- def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length),
- [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length))]>;
- def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256),
- [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256))]>;
- }
-}
-
-// Define an instruction that operates on two strings, both terminated
-// by the character in R0. The instruction processes a CPU-determinated
-// number of bytes at a time and sets CC to 3 if the instruction needs
-// to be repeated. Also define a pseudo instruction that represents
-// the full loop (the main instruction plus the branch on CC==3).
-multiclass StringRRE<string mnemonic, bits<16> opcode,
- SDPatternOperator operator> {
- let Uses = [R0L] in
- def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
- let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
- def Loop : Pseudo<(outs GR64:$end),
- (ins GR64:$start1, GR64:$start2, GR32:$char),
- [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
- GR32:$char))]>;
-}
-
// A pseudo instruction that is a direct alias of a real instruction.
// These aliases are used in cases where a particular register operand is
// fixed or where the same instruction is used with different register sizes.
imm32zx6:$I5), []> {
let Constraints = "$R1 = $R1src";
}
+
+//===----------------------------------------------------------------------===//
+// Multiclasses that emit both real and pseudo instructions
+//===----------------------------------------------------------------------===//
+
+multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only> {
+
+ def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> {
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
+ }
+ let Has20BitOffset = 1 in
+ def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, mode>;
+}
+
+multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
+ bits<16> rxyOpcode, SDPatternOperator operator,
+ RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
+ bdxaddr12pair> {
+ let DispSize = "12";
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
+ }
+ let DispSize = "20" in
+ def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load,
+ bytes, bdxaddr20pair>;
+ }
+ def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
+}
+
+// Define an instruction that operates on two fixed-length blocks of memory,
+// and associated pseudo instructions for operating on blocks of any size.
+// The Sequence form uses a straight-line sequence of instructions and
+// the Loop form uses a loop of length-256 instructions followed by
+// another instruction to handle the excess.
+multiclass MemorySS<string mnemonic, bits<8> opcode,
+ SDPatternOperator sequence, SDPatternOperator loop> {
+ def "" : SideEffectBinarySSa<mnemonic, opcode>;
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
+ def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(sequence bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length)]>;
+ def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256),
+ [(loop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256)]>;
+ }
+}
+
+// The same, but setting a CC result as comparion operator.
+multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
+ SDPatternOperator sequence, SDPatternOperator loop> {
+ def "" : SideEffectBinarySSa<mnemonic, opcode>;
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+ def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length))]>;
+ def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256),
+ [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256))]>;
+ }
+}
+
+// Define an instruction that operates on two strings, both terminated
+// by the character in R0. The instruction processes a CPU-determinated
+// number of bytes at a time and sets CC to 3 if the instruction needs
+// to be repeated. Also define a pseudo instruction that represents
+// the full loop (the main instruction plus the branch on CC==3).
+multiclass StringRRE<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator> {
+ let Uses = [R0L] in
+ def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
+ def Loop : Pseudo<(outs GR64:$end),
+ (ins GR64:$start1, GR64:$start2, GR32:$char),
+ [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
+ GR32:$char))]>;
+}
}
}
-// Used to return from convertToThreeAddress after replacing two-address
-// instruction OldMI with three-address instruction NewMI.
-static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
- MachineInstr *NewMI,
- LiveVariables *LV) {
- if (LV) {
- unsigned NumOps = OldMI->getNumOperands();
- for (unsigned I = 1; I < NumOps; ++I) {
- MachineOperand &Op = OldMI->getOperand(I);
- if (Op.isReg() && Op.isKill())
- LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI);
- }
- }
- transferDeadCC(OldMI, NewMI);
- return NewMI;
-}
-
MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
MachineBasicBlock *MBB = MI.getParent();
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- unsigned Opcode = MI.getOpcode();
- unsigned NumOps = MI.getNumOperands();
-
- // Try to convert something like SLL into SLLK, if supported.
- // We prefer to keep the two-operand form where possible both
- // because it tends to be shorter and because some instructions
- // have memory forms that can be used during spilling.
- if (STI.hasDistinctOps()) {
- MachineOperand &Dest = MI.getOperand(0);
- MachineOperand &Src = MI.getOperand(1);
- unsigned DestReg = Dest.getReg();
- unsigned SrcReg = Src.getReg();
- // AHIMux is only really a three-operand instruction when both operands
- // are low registers. Try to constrain both operands to be low if
- // possible.
- if (Opcode == SystemZ::AHIMux &&
- TargetRegisterInfo::isVirtualRegister(DestReg) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
- MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
- MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
- MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
- }
- int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
- if (ThreeOperandOpcode >= 0) {
- // Create three address instruction without adding the implicit
- // operands. Those will instead be copied over from the original
- // instruction by the loop below.
- MachineInstrBuilder MIB(
- *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
- /*NoImplicit=*/true));
- MIB.add(Dest);
- // Keep the kill state, but drop the tied flag.
- MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
- // Keep the remaining operands as-is.
- for (unsigned I = 2; I < NumOps; ++I)
- MIB.add(MI.getOperand(I));
- MBB->insert(MI, MIB);
- return finishConvertToThreeAddress(&MI, MIB, LV);
- }
- }
// Try to convert an AND into an RISBG-type instruction.
- if (LogicOp And = interpretAndImmediate(Opcode)) {
+ // TODO: It might be beneficial to select RISBG and shorten to AND instead.
+ if (LogicOp And = interpretAndImmediate(MI.getOpcode())) {
uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB;
// AND IMMEDIATE leaves the other bits of the register unchanged.
Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
.addImm(Start)
.addImm(End + 128)
.addImm(0);
- return finishConvertToThreeAddress(&MI, MIB, LV);
+ if (LV) {
+ unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I < NumOps; ++I) {
+ MachineOperand &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.isKill())
+ LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
+ }
+ }
+ transferDeadCC(&MI, MIB);
+ return MIB;
}
}
return nullptr;
MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS) const {
+ LiveIntervals *LIS, VirtRegMap *VRM) const {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = MFI.getObjectSize(FrameIndex);
}
}
- // If the spilled operand is the final one, try to change <INSN>R
- // into <INSN>.
+ // If the spilled operand is the final one or the instruction is
+ // commutable, try to change <INSN>R into <INSN>.
+ unsigned NumOps = MI.getNumExplicitOperands();
int MemOpcode = SystemZ::getMemOpcode(Opcode);
+
+ // See if this is a 3-address instruction that is convertible to 2-address
+ // and suitable for folding below. Only try this with virtual registers
+ // and a provided VRM (during regalloc).
+ bool NeedsCommute = false;
+ if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
+ if (VRM == nullptr)
+ MemOpcode = -1;
+ else {
+ assert(NumOps == 3 && "Expected two source registers.");
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned DstPhys =
+ (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
+ unsigned SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
+ : ((OpNum == 1 && MI.isCommutable())
+ ? MI.getOperand(2).getReg()
+ : 0));
+ if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&
+ TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg))
+ NeedsCommute = (OpNum == 1);
+ else
+ MemOpcode = -1;
+ }
+ }
+
if (MemOpcode >= 0) {
- unsigned NumOps = MI.getNumExplicitOperands();
- if (OpNum == NumOps - 1) {
+ if ((OpNum == NumOps - 1) || NeedsCommute) {
const MCInstrDesc &MemDesc = get(MemOpcode);
uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
assert(AccessBytes != 0 && "Size of access should be known");
uint64_t Offset = Size - AccessBytes;
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
MI.getDebugLoc(), get(MemOpcode));
- for (unsigned I = 0; I < OpNum; ++I)
- MIB.add(MI.getOperand(I));
+ MIB.add(MI.getOperand(0));
+ if (NeedsCommute)
+ MIB.add(MI.getOperand(2));
+ else
+ for (unsigned I = 1; I < OpNum; ++I)
+ MIB.add(MI.getOperand(I));
MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
} // end namespace SystemZII
+namespace SystemZ {
+int getTwoOperandOpcode(uint16_t Opcode);
+int getTargetMemOpcode(uint16_t Opcode);
+}
+
class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI;
SystemZSubtarget &STI;
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr) const override;
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
MachineInstr *foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
// Addition of memory.
defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>;
- defm A : BinaryRXPair<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>;
+ defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>;
def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>;
def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>;
- def AG : BinaryRXY<"ag", 0xE308, z_sadd, GR64, load, 8>;
+ defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>;
// Addition to memory.
def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>;
Requires<[FeatureHighWord]>;
// Addition of memory.
- defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
+ defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>;
- def ALG : BinaryRXY<"alg", 0xE30A, z_uadd, GR64, load, 8>;
+ defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>;
// Addition to memory.
def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>;
// Subtraction of memory.
defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>;
- defm S : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
+ defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>;
def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>;
- def SG : BinaryRXY<"sg", 0xE309, z_ssub, GR64, load, 8>;
+ defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>;
}
defm : SXB<z_ssub, GR64, SGFR>;
def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>;
// Subtraction of memory.
- defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
+ defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>;
- def SLG : BinaryRXY<"slg", 0xE30B, z_usub, GR64, load, 8>;
+ defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>;
}
defm : ZXB<z_usub, GR64, SLGFR>;
// ANDs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
- def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+ defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>;
+ defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>;
}
// AND to memory
// ORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
- def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+ defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>;
+ defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>;
}
// OR to memory
// XORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
- def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+ defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>;
+ defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>;
}
// XOR to memory
--- /dev/null
+//==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that is run immediately after VirtRegRewriter
+// but before MachineCopyPropagation. The purpose is to lower pseudos to
+// target instructions before any later pass might substitute a register for
+// another.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass"
+
+#define DEBUG_TYPE "systemz-postrewrite"
+STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops.");
+
+namespace llvm {
+ void initializeSystemZPostRewritePass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZPostRewrite : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZPostRewrite() : MachineFunctionPass(ID) {
+ initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry());
+ }
+
+ const SystemZInstrInfo *TII;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool selectMBB(MachineBasicBlock &MBB);
+};
+
+char SystemZPostRewrite::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite",
+ SYSTEMZ_POSTREWRITE_NAME, false, false)
+
+/// Returns an instance of the Post Rewrite pass.
+FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) {
+ return new SystemZPostRewrite();
+}
+
+/// If MBBI references a pseudo instruction that should be selected here,
+/// do it and return true. Otherwise return false.
+bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+
+ // Note: If this could be done during regalloc in foldMemoryOperandImpl()
+ // while also updating the LiveIntervals, there would be no need for the
+ // MemFoldPseudo to begin with.
+ int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode);
+ if (TargetMemOpcode != -1) {
+ MI.setDesc(TII->get(TargetMemOpcode));
+ MI.tieOperands(0, 1);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MachineOperand &SrcMO = MI.getOperand(1);
+ if (DstReg != SrcMO.getReg()) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg)
+ .addReg(SrcMO.getReg());
+ SrcMO.setReg(DstReg);
+ MemFoldCopies++;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// Iterate over the instructions in basic block MBB and select any
+/// pseudo instructions. Return true if anything was modified.
+bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= selectMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= selectMBB(MBB);
+
+ return Modified;
+}
+
const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
VirtReg, Order, Hints, MF, VRM, Matrix);
}
}
+ if (VRM == nullptr)
+ return BaseImplRetVal;
+
+ // Add any two address hints after any copy hints.
+ SmallSet<unsigned, 4> TwoAddrHints;
+ for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
+ if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
+ const MachineOperand *VRRegMO = nullptr;
+ const MachineOperand *OtherMO = nullptr;
+ const MachineOperand *CommuMO = nullptr;
+ if (VirtReg == Use.getOperand(0).getReg()) {
+ VRRegMO = &Use.getOperand(0);
+ OtherMO = &Use.getOperand(1);
+ if (Use.isCommutable())
+ CommuMO = &Use.getOperand(2);
+ } else if (VirtReg == Use.getOperand(1).getReg()) {
+ VRRegMO = &Use.getOperand(1);
+ OtherMO = &Use.getOperand(0);
+ } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) {
+ VRRegMO = &Use.getOperand(2);
+ OtherMO = &Use.getOperand(0);
+ } else
+ continue;
+
+ auto tryAddHint = [&](const MachineOperand *MO) -> void {
+ unsigned Reg = MO->getReg();
+ unsigned PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
+ if (PhysReg) {
+ if (MO->getSubReg())
+ PhysReg = getSubReg(PhysReg, MO->getSubReg());
+ if (VRRegMO->getSubReg())
+ PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
+ MRI->getRegClass(VirtReg));
+ if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
+ TwoAddrHints.insert(PhysReg);
+ }
+ };
+ tryAddHint(OtherMO);
+ if (CommuMO)
+ tryAddHint(CommuMO);
+ }
+ for (MCPhysReg OrderReg : Order)
+ if (TwoAddrHints.count(OrderReg))
+ Hints.push_back(OrderReg);
+
return BaseImplRetVal;
}
case SystemZ::VST64:
Changed |= shortenOn0(MI, SystemZ::STD);
break;
+
+ default: {
+ int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode());
+ if (TwoOperandOpcode == -1)
+ break;
+
+ if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) &&
+ (!MI.isCommutable() ||
+ MI.getOperand(0).getReg() != MI.getOperand(2).getReg() ||
+ !TII->commuteInstruction(MI, false, 1, 2)))
+ break;
+
+ MI.setDesc(TII->get(TwoOperandOpcode));
+ MI.tieOperands(0, 1);
+ if (TwoOperandOpcode == SystemZ::SLL ||
+ TwoOperandOpcode == SystemZ::SLA ||
+ TwoOperandOpcode == SystemZ::SRL ||
+ TwoOperandOpcode == SystemZ::SRA) {
+ // These shifts only use the low 6 bits of the shift count.
+ MachineOperand &ImmMO = MI.getOperand(3);
+ ImmMO.setImm(ImmMO.getImm() & 0xfff);
+ }
+ Changed = true;
+ break;
+ }
}
LiveRegs.stepBackward(MI);
void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
+ void addPostRewrite() override;
void addPreSched2() override;
void addPreEmitPass() override;
};
return true;
}
+void SystemZPassConfig::addPostRewrite() {
+ addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
+}
+
void SystemZPassConfig::addPreSched2() {
+ // PostRewrite needs to be run at -O0 also (in which case addPostRewrite()
+ // is not called).
+ if (getOptLevel() == CodeGenOpt::None)
+ addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
+
addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
if (getOptLevel() != CodeGenOpt::None)
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt,
- int FrameIndex, LiveIntervals *LIS) const {
+ int FrameIndex, LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
// Check switch flag
if (NoFusing)
return nullptr;
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr) const override;
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
}
; Test three-operand halfword immediate addition involving mixtures of low
-; and high registers. RISBHG/AIH would be OK too, instead of AHIK/RISBHG.
+; and high registers. AHIK/RISBHG would be OK too, instead of RISBHG/AIH.
define i32 @f28(i32 %old) {
; CHECK-LABEL: f28:
; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14
; CHECK: stepa %r2, [[REG1]]
-; CHECK: ahik [[TMP:%r[0-5]]], [[REG1]], 254
-; CHECK: risbhg [[REG2:%r[0-5]]], [[TMP]], 0, 159, 32
+; CHECK: risbhg [[REG1]], [[REG1]], 0, 159, 32
+; CHECK: aih [[REG1]], 254
; CHECK: stepb [[REG1]], [[REG2]]
; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0
; CHECK: aih [[REG3]], 127
; CHECK-LABEL: f1:
; CHECK: popcnt %r0, %r2
; CHECK: sllk %r1, %r0, 16
-; CHECK: ar %r1, %r0
-; CHECK: sllk %r2, %r1, 8
-; CHECK: ar %r2, %r1
-; CHECK: srl %r2, 24
+; CHECK: ar %r0, %r1
+; CHECK: sllk %r1, %r0, 8
+; CHECK: ar %r0, %r1
+; CHECK: srlk %r2, %r0, 24
; CHECK: br %r14
%popcnt = call i32 @llvm.ctpop.i32(i32 %a)
; CHECK-LABEL: f2:
; CHECK: llhr %r0, %r2
; CHECK: popcnt %r0, %r0
-; CHECK: risblg %r2, %r0, 16, 151, 8
-; CHECK: ar %r2, %r0
-; CHECK: srl %r2, 8
+; CHECK: risblg %r1, %r0, 16, 151, 8
+; CHECK: ar %r0, %r1
+; CHECK: srlk %r2, %r0, 8
; CHECK: br %r14
%and = and i32 %a, 65535
%popcnt = call i32 @llvm.ctpop.i32(i32 %and)
; CHECK-LABEL: f4:
; CHECK: popcnt %r0, %r2
; CHECK: sllg %r1, %r0, 32
-; CHECK: agr %r1, %r0
-; CHECK: sllg %r0, %r1, 16
+; CHECK: agr %r0, %r1
+; CHECK: sllg %r1, %r0, 16
; CHECK: agr %r0, %r1
; CHECK: sllg %r1, %r0, 8
-; CHECK: agr %r1, %r0
-; CHECK: srlg %r2, %r1, 56
+; CHECK: agr %r0, %r1
+; CHECK: srlg %r2, %r0, 56
; CHECK: br %r14
%popcnt = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %popcnt
; CHECK: llghr %r0, %r2
; CHECK: popcnt %r0, %r0
; CHECK: risbg %r1, %r0, 48, 183, 8
-; CHECK: agr %r1, %r0
-; CHECK: srlg %r2, %r1, 8
+; CHECK: agr %r0, %r1
+; CHECK: srlg %r2, %r0, 8
; CHECK: br %r14
%and = and i64 %a, 65535
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
; Test 64-bit addition in which the second operand is variable.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s --check-prefixes=CHECK,Z10
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s --check-prefixes=CHECK,Z196
declare i64 @foo()
}
; Check that additions of spilled values can use AG rather than AGR.
+; Note: Z196 is suboptimal with one unfolded reload.
define i64 @f9(i64 *%ptr0) {
; CHECK-LABEL: f9:
; CHECK: brasl %r14, foo@PLT
-; CHECK: ag %r2, 160(%r15)
+; Z10: ag %r2, 168(%r15)
+; Z196: ag %r0, 168(%r15)
; CHECK: br %r14
%ptr1 = getelementptr i64, i64 *%ptr0, i64 2
%ptr2 = getelementptr i64, i64 *%ptr0, i64 4
--- /dev/null
+; Test of subtraction that involves a constant as the first operand
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check highest 16-bit signed int immediate value.
+define i64 @f1(i64 %a) {
+; CHECK-LABEL: f1:
+; CHECK: lghi %r0, 32767
+; CHECK: sgrk %r2, %r0, %r2
+; CHECK: br %r14
+ %sub = sub i64 32767, %a
+ ret i64 %sub
+}
+; Check highest 32-bit signed int immediate value.
+define i64 @f2(i64 %a) {
+; CHECK-LABEL: f2:
+; CHECK: lgfi %r0, 2147483647
+; CHECK: sgrk %r2, %r0, %r2
+; CHECK: br %r14
+ %sub = sub i64 2147483647, %a
+ ret i64 %sub
+}
; CHECK-LABEL: %bb.0:
; CHECK-NEXT: # kill
; CHECK-NEXT: llghr %r0, %r2
-; CHECK-NEXT: flogr %r2, %r0
-; CHECK-NEXT: aghi %r2, -32
-; CHECK-NEXT: ahi %r2, -16
-; CHECK-NEXT: # kill
+; CHECK-NEXT: flogr %r0, %r0
+; CHECK-NEXT: aghi %r0, -32
+; CHECK-NEXT: ahik %r2, %r0, -16
; CHECK-NEXT: br %r14
%1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 false)
ret i16 %1
; CHECK-LABEL: %bb.0:
; CHECK-NEXT: # kill
; CHECK-NEXT: llghr %r0, %r2
-; CHECK-NEXT: flogr %r2, %r0
-; CHECK-NEXT: aghi %r2, -32
-; CHECK-NEXT: ahi %r2, -16
-; CHECK-NEXT: # kill
+; CHECK-NEXT: flogr %r0, %r0
+; CHECK-NEXT: aghi %r0, -32
+; CHECK-NEXT: ahik %r2, %r0, -16
; CHECK-NEXT: br %r14
%1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true)
ret i16 %1
; CHECK-LABEL: %bb.0:
; CHECK-NEXT: # kill
; CHECK-NEXT: llgcr %r0, %r2
-; CHECK-NEXT: flogr %r2, %r0
-; CHECK-NEXT: aghi %r2, -32
-; CHECK-NEXT: ahi %r2, -24
-; CHECK-NEXT: # kill
+; CHECK-NEXT: flogr %r0, %r0
+; CHECK-NEXT: aghi %r0, -32
+; CHECK-NEXT: ahik %r2, %r0, -24
; CHECK-NEXT: br %r14
%1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 false)
ret i8 %1
; CHECK-LABEL: %bb.0:
; CHECK-NEXT: # kill
; CHECK-NEXT: llgcr %r0, %r2
-; CHECK-NEXT: flogr %r2, %r0
-; CHECK-NEXT: aghi %r2, -32
-; CHECK-NEXT: ahi %r2, -24
-; CHECK-NEXT: # kill
+; CHECK-NEXT: flogr %r0, %r0
+; CHECK-NEXT: aghi %r0, -32
+; CHECK-NEXT: ahik %r2, %r0, -24
; CHECK-NEXT: br %r14
%1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true)
ret i8 %1
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
-; CHECK-NEXT: vlgvf %r3, %v26, 1
-; CHECK-NEXT: vlgvf %r1, %v26, 2
-; CHECK-NEXT: risbgn %r4, %r3, 0, 129, 62
-; CHECK-NEXT: rosbg %r4, %r1, 2, 32, 31
+; CHECK-DAG: vlgvf [[REG11:%r[0-9]+]], %v26, 1
+; CHECK-DAG: vlgvf [[REG12:%r[0-9]+]], %v26, 2
+; CHECK-DAG: risbgn [[REG13:%r[0-9]+]], [[REG11]], 0, 129, 62
+; CHECK-DAG: rosbg [[REG13]], [[REG12]], 2, 32, 31
; CHECK-DAG: vlgvf %r0, %v26, 3
-; CHECK-DAG: rosbg %r4, %r0, 33, 63, 0
+; CHECK-DAG: rosbg [[REG13]], %r0, 33, 63, 0
; CHECK-DAG: stc %r0, 30(%r2)
-; CHECK-DAG: srl %r0, 8
+; CHECK-DAG: srlk %r1, %r0, 8
; CHECK-DAG: vlgvf [[REG0:%r[0-9]+]], %v24, 1
; CHECK-DAG: vlgvf [[REG1:%r[0-9]+]], %v24, 0
-; CHECK-DAG: sth %r0, 28(%r2)
+; CHECK-DAG: sth %r1, 28(%r2)
; CHECK-DAG: vlgvf [[REG2:%r[0-9]+]], %v24, 2
; CHECK-DAG: risbgn [[REG3:%r[0-9]+]], [[REG0]], 0, 133, 58
; CHECK-DAG: rosbg [[REG3]], [[REG2]], 6, 36, 27
; CHECK-DAG: rosbg [[REG3]], [[REG5]], 37, 63, 60
; CHECK-DAG: sllg [[REG6:%r[0-9]+]], [[REG4]], 8
; CHECK-DAG: rosbg [[REG6]], [[REG3]], 56, 63, 8
-; CHECK-NEXT: stg [[REG6]], 0(%r2)
-; CHECK-NEXT: srlg [[REG7:%r[0-9]+]], %r4, 24
-; CHECK-NEXT: st [[REG7]], 24(%r2)
-; CHECK-NEXT: vlgvf [[REG8:%r[0-9]+]], %v26, 0
-; CHECK-NEXT: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60
-; CHECK-NEXT: rosbg [[REG10]], [[REG8]], 4, 34, 29
-; CHECK-NEXT: sllg [[REG9:%r[0-9]+]], [[REG3]], 8
-; CHECK-NEXT: rosbg [[REG10]], %r3, 35, 63, 62
-; CHECK-NEXT: rosbg [[REG9]], [[REG10]], 56, 63, 8
-; CHECK-NEXT: stg [[REG9]], 8(%r2)
-; CHECK-NEXT: sllg %r0, [[REG10]], 8
-; CHECK-NEXT: rosbg %r0, %r4, 56, 63, 8
+; CHECK-DAG: stg [[REG6]], 0(%r2)
+; CHECK-DAG: srlg [[REG7:%r[0-9]+]], [[REG13]], 24
+; CHECK-DAG: st [[REG7]], 24(%r2)
+; CHECK-DAG: vlgvf [[REG8:%r[0-9]+]], %v26, 0
+; CHECK-DAG: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60
+; CHECK-DAG: rosbg [[REG10]], [[REG8]], 4, 34, 29
+; CHECK-DAG: sllg [[REG9:%r[0-9]+]], [[REG3]], 8
+; CHECK-DAG: rosbg [[REG10]], [[REG11]], 35, 63, 62
+; CHECK-DAG: rosbg [[REG9]], [[REG10]], 56, 63, 8
+; CHECK-DAG: stg [[REG9]], 8(%r2)
+; CHECK-DAG: sllg %r0, [[REG10]], 8
+; CHECK-DAG: rosbg %r0, [[REG13]], 56, 63, 8
; CHECK-NEXT: stg %r0, 16(%r2)
; CHECK-NEXT: lmg %r14, %r15, 112(%r15)
; CHECK-NEXT: br %r14
; CHECK-NOT: vmrh
; CHECK: ar {{%r[0-5]}},
; CHECK: ar {{%r[0-5]}},
-; CHECK: or %r2,
+; CHECK: ork %r2,
; CHECK: br %r14
%vec0 = insertelement <2 x double> undef, double %scalar0, i32 0
%vec1 = insertelement <2 x double> undef, double %scalar1, i32 0