SystemZCallingConv.cpp
SystemZConstantPoolValue.cpp
SystemZElimCompare.cpp
+ SystemZExpandPseudo.cpp
SystemZFrameLowering.cpp
SystemZHazardRecognizer.cpp
SystemZISelDAGToDAG.cpp
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
--- /dev/null
+//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling and other late optimizations. This
+// pass should be run after register allocation but before the post-regalloc
+// scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass"
+
+namespace llvm {
+ void initializeSystemZExpandPseudoPass(PassRegistry&);
+}
+
+namespace {
+class SystemZExpandPseudo : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZExpandPseudo() : MachineFunctionPass(ID) {
+ initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ const SystemZInstrInfo *TII;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+};
+char SystemZExpandPseudo::ID = 0;
+}
+
+INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo",
+ SYSTEMZ_EXPAND_PSEUDO_NAME, false, false)
+
+/// \brief Returns an instance of the pseudo instruction expansion pass.
+FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) {
+ return new SystemZExpandPseudo();
+}
+
+// MI is a load-register-on-condition pseudo instruction that could not be
+// handled as a single hardware instruction. Replace it by a branch sequence.
+bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineFunction &MF = *MBB.getParent();
+ const BasicBlock *BB = MBB.getBasicBlock();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(2).getReg();
+ unsigned CCValid = MI.getOperand(3).getImm();
+ unsigned CCMask = MI.getOperand(4).getImm();
+
+ LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ for (auto I = std::prev(MBB.end()); I != MBBI; --I)
+ LiveRegs.stepBackward(*I);
+
+ // Splice MBB at MI, moving the rest of the block into RestMBB.
+ MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
+ RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
+ RestMBB->transferSuccessors(&MBB);
+ for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+ RestMBB->addLiveIn(*I);
+
+ // Create a new block MoveMBB to hold the move instruction.
+ MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
+ MoveMBB->addLiveIn(SrcReg);
+ for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+ MoveMBB->addLiveIn(*I);
+
+ // At the end of MBB, create a conditional branch to RestMBB if the
+ // condition is false, otherwise fall through to MoveMBB.
+ BuildMI(&MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB);
+ MBB.addSuccessor(RestMBB);
+ MBB.addSuccessor(MoveMBB);
+
+ // In MoveMBB, emit an instruction to move SrcReg into DestReg,
+ // then fall through to RestMBB.
+ TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg,
+ MI.getOperand(2).isKill());
+ MoveMBB->addSuccessor(RestMBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+ return true;
+}
+
+/// \brief If MBBI references a pseudo instruction that should be expanded here,
+/// do the expansion and return true. Otherwise return false.
+bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ switch (MI.getOpcode()) {
+ case SystemZ::LOCRMux:
+ return expandLOCRMux(MBB, MBBI, NextMBBI);
+ default:
+ break;
+ }
+ return false;
+}
+
+/// \brief Iterate over the instructions in basic block MBB and expand any
+/// pseudo instructions. Return true if anything was modified.
+bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+ return Modified;
+}
+
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
// Prefer to put any load first, so that it can be matched as a
- // conditional load.
- if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) {
+ // conditional load. Likewise for constants in range for LOCHI.
+ if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) ||
+ (Subtarget->hasLoadStoreOnCond2() &&
+ Node->getValueType(0).isInteger() &&
+ Op1.getOpcode() == ISD::Constant &&
+ isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) &&
+ !(Op0.getOpcode() == ISD::Constant &&
+ isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) {
SDValue CCValid = Node->getOperand(2);
SDValue CCMask = Node->getOperand(3);
uint64_t ConstCCValid =
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
+ MachineBasicBlock *MBB,
+ unsigned LOCROpcode) const {
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
unsigned CCMask = MI.getOperand(4).getImm();
DebugLoc DL = MI.getDebugLoc();
+ // Use LOCROpcode if possible.
+ if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) {
+ BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg)
+ .addReg(FalseReg).addReg(TrueReg)
+ .addImm(CCValid).addImm(CCMask);
+ MI.eraseFromParent();
+ return MBB;
+ }
+
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
case SystemZ::Select32Mux:
+ return emitSelect(MI, MBB,
+ Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0);
case SystemZ::Select32:
- case SystemZ::SelectF32:
+ return emitSelect(MI, MBB, SystemZ::LOCR);
case SystemZ::Select64:
+ return emitSelect(MI, MBB, SystemZ::LOCGR);
+ case SystemZ::SelectF32:
case SystemZ::SelectF64:
case SystemZ::SelectF128:
- return emitSelect(MI, MBB);
+ return emitSelect(MI, MBB, 0);
case SystemZ::CondStore8Mux:
return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
case SystemZ::CondStore16MuxInv:
return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
+ case SystemZ::CondStore32Mux:
+ return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
+ case SystemZ::CondStore32MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
case SystemZ::CondStore8:
return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
case SystemZ::CondStore8Inv:
MachineBasicBlock *Target) const;
// Implement EmitInstrWithCustomInserter for individual operation types.
- MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned LOCROpcode) const;
MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const;
let OpType = "reg";
}
-// These instructions are generated by if conversion. The old value of R1
-// is added as an implicit use.
-class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
- RegisterOperand cls2>
- : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$M3),
- mnemonic#"$M3\t$R1, $R2", []> {
- let CCMaskLast = 1;
-}
-
-class CondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
- Immediate imm>
- : InstRIEg<opcode, (outs cls:$R1), (ins imm:$I2, cond4:$valid, cond4:$M3),
- mnemonic#"$M3\t$R1, $I2", []> {
- let CCMaskLast = 1;
-}
-
-// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
-// mask is the third operand rather than being part of the mnemonic.
-class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
- RegisterOperand cls2>
- : InstRRFc<opcode, (outs cls1:$R1),
- (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3),
- mnemonic#"\t$R1, $R2, $M3", []> {
- let Constraints = "$R1 = $R1src";
- let DisableEncoding = "$R1src";
-}
-
-class AsmCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
- Immediate imm>
- : InstRIEg<opcode, (outs cls:$R1),
- (ins cls:$R1src, imm:$I2, imm32zx4:$M3),
- mnemonic#"\t$R1, $I2, $M3", []> {
- let Constraints = "$R1 = $R1src";
- let DisableEncoding = "$R1src";
-}
-
-// Like CondUnaryRRF, but with a fixed CC mask.
-class FixedCondUnaryRRF<CondVariant V, string mnemonic, bits<16> opcode,
- RegisterOperand cls1, RegisterOperand cls2>
- : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
- mnemonic#V.suffix#"\t$R1, $R2", []> {
- let Constraints = "$R1 = $R1src";
- let DisableEncoding = "$R1src";
- let isAsmParserOnly = V.alternate;
- let M3 = V.ccmask;
-}
-
-class FixedCondUnaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
- : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
- mnemonic#V.suffix#"\t$R1, $I2", []> {
- let Constraints = "$R1 = $R1src";
- let DisableEncoding = "$R1src";
- let isAsmParserOnly = V.alternate;
- let M3 = V.ccmask;
-}
-
-multiclass CondUnaryRRFPair<string mnemonic, bits<16> opcode,
- RegisterOperand cls1, RegisterOperand cls2> {
- let isCodeGenOnly = 1 in
- def "" : CondUnaryRRF<mnemonic, opcode, cls1, cls2>;
- def Asm : AsmCondUnaryRRF<mnemonic, opcode, cls1, cls2>;
-}
-
-multiclass CondUnaryRIEPair<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm> {
- let isCodeGenOnly = 1 in
- def "" : CondUnaryRIE<mnemonic, opcode, cls, imm>;
- def Asm : AsmCondUnaryRIE<mnemonic, opcode, cls, imm>;
-}
-
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins imm:$I2),
let M4 = 0;
}
+class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3),
+ mnemonic#"$M3\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let CCMaskLast = 1;
+}
+
+// Like CondBinaryRRF, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $R2, $M3", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+// Like CondBinaryRRF, but with a fixed CC mask.
+class FixedCondBinaryRRF<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#V.suffix#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2> {
+ let isCodeGenOnly = 1 in
+ def "" : CondBinaryRRF<mnemonic, opcode, cls1, cls2>;
+ def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
+}
+
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
}
}
+class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ Immediate imm>
+ : InstRIEg<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
+ mnemonic#"$M3\t$R1, $I2",
+ [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
+ cond4:$valid, cond4:$M3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let CCMaskLast = 1;
+}
+
+// Like CondBinaryRIE, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ Immediate imm>
+ : InstRIEg<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm:$I2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $I2, $M3", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+// Like CondBinaryRIE, but with a fixed CC mask.
+class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ mnemonic#V.suffix#"\t$R1, $I2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm> {
+ let isCodeGenOnly = 1 in
+ def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>;
+ def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>;
+}
+
class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
let AccessBytes = bytes;
}
+// Like CondBinaryRRF, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let CCMaskLast = 1;
+}
+
+// Like CondBinaryRIE, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
+ : Pseudo<(outs cls:$R1),
+ (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
+ [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
+ cond4:$valid, cond4:$M3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let CCMaskLast = 1;
+}
+
+// Like CondUnaryRSY, but expanded after RA depending on the choice of
+// register.
+class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr20only>
+ : Pseudo<(outs cls:$R1),
+ (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
+ [(set cls:$R1,
+ (z_select_ccmask (operator mode:$BD2), cls:$R1src,
+ cond4:$valid, cond4:$R3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let CCMaskLast = 1;
+}
+
+// Like CondStoreRSY, but expanded after RA depending on the choice of
+// register.
+class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+ let CCMaskLast = 1;
+}
+
// Like StoreRXY, but expanded after RA depending on the choice of register.
class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdxaddr20only>
MI.setDesc(get(Opcode));
}
+// MI is a load-on-condition pseudo instruction with a single register
+// (source or destination) operand. Replace it with LowOpcode if the
+// register is a low GR32 and HighOpcode if the register is a high GR32.
+void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const {
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode;
+ MI.setDesc(get(Opcode));
+}
+
+// MI is a load-register-on-condition pseudo instruction. Replace it with
+// LowOpcode if source and destination are both low GR32s and HighOpcode if
+// source and destination are both high GR32s.
+void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(2).getReg();
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+
+ if (!DestIsHigh && !SrcIsHigh)
+ MI.setDesc(get(LowOpcode));
+ else if (DestIsHigh && SrcIsHigh)
+ MI.setDesc(get(HighOpcode));
+
+ // If we were unable to implement the pseudo with a single instruction, we
+ // need to convert it back into a branch sequence. This cannot be done here
+ // since the caller of expandPostRAPseudo does not handle changes to the CFG
+ // correctly. This change is defered to the SystemZExpandPseudo pass.
+}
+
// MI is an RR-style pseudo instruction that zero-extends the low Size bits
// of one GRX32 into another. Replace it with LowOpcode if both operands
// are low registers, otherwise use RISB[LH]G.
.addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
}
+
+MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
+ if (NewMI)
+ return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
+ return MI;
+ };
+
+ switch (MI.getOpcode()) {
+ case SystemZ::LOCRMux:
+ case SystemZ::LOCFHR:
+ case SystemZ::LOCR:
+ case SystemZ::LOCGR: {
+ auto &WorkingMI = cloneIfNew(MI);
+ // Invert condition.
+ unsigned CCValid = WorkingMI.getOperand(3).getImm();
+ unsigned CCMask = WorkingMI.getOperand(4).getImm();
+ WorkingMI.getOperand(4).setImm(CCMask ^ CCValid);
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
+ default:
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+ }
+}
+
+
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
}
-// If Opcode is a move that has a conditional variant, return that variant,
-// otherwise return 0.
-static unsigned getConditionalMove(unsigned Opcode) {
- switch (Opcode) {
- case SystemZ::LR: return SystemZ::LOCR;
- case SystemZ::LGR: return SystemZ::LOCGR;
- default: return 0;
+
+bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Pred,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles,
+ int &FalseCycles) const {
+ // Not all subtargets have LOCR instructions.
+ if (!STI.hasLoadStoreOnCond())
+ return false;
+ if (Pred.size() != 2)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // We have LOCR instructions for 32 and 64 bit general purpose registers.
+ if ((STI.hasLoadStoreOnCond2() &&
+ SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) ||
+ SystemZ::GR32BitRegClass.hasSubClassEq(RC) ||
+ SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
+ CondCycles = 2;
+ TrueCycles = 2;
+ FalseCycles = 2;
+ return true;
}
+
+ // Can't do anything else.
+ return false;
}
-static unsigned getConditionalLoadImmediate(unsigned Opcode) {
- switch (Opcode) {
- case SystemZ::LHI: return SystemZ::LOCHI;
- case SystemZ::LGHI: return SystemZ::LOCGHI;
- default: return 0;
+void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DstReg,
+ ArrayRef<MachineOperand> Pred,
+ unsigned TrueReg,
+ unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+
+ assert(Pred.size() == 2 && "Invalid condition");
+ unsigned CCValid = Pred[0].getImm();
+ unsigned CCMask = Pred[1].getImm();
+
+ unsigned Opc;
+ if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
+ if (STI.hasLoadStoreOnCond2())
+ Opc = SystemZ::LOCRMux;
+ else {
+ Opc = SystemZ::LOCR;
+ MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass);
+ }
+ } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
+ Opc = SystemZ::LOCGR;
+ else
+ llvm_unreachable("Invalid register class");
+
+ BuildMI(MBB, I, DL, get(Opc), DstReg)
+ .addReg(FalseReg).addReg(TrueReg)
+ .addImm(CCValid).addImm(CCMask);
+}
+
+bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ unsigned DefOpc = DefMI.getOpcode();
+ if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI &&
+ DefOpc != SystemZ::LGHI)
+ return false;
+ if (DefMI.getOperand(0).getReg() != Reg)
+ return false;
+ int32_t ImmVal = (int32_t)DefMI.getOperand(1).getImm();
+
+ unsigned UseOpc = UseMI.getOpcode();
+ unsigned NewUseOpc;
+ unsigned UseIdx;
+ int CommuteIdx = -1;
+ switch (UseOpc) {
+ case SystemZ::LOCRMux:
+ if (!STI.hasLoadStoreOnCond2())
+ return false;
+ NewUseOpc = SystemZ::LOCHIMux;
+ if (UseMI.getOperand(2).getReg() == Reg)
+ UseIdx = 2;
+ else if (UseMI.getOperand(1).getReg() == Reg)
+ UseIdx = 2, CommuteIdx = 1;
+ else
+ return false;
+ break;
+ case SystemZ::LOCGR:
+ if (!STI.hasLoadStoreOnCond2())
+ return false;
+ NewUseOpc = SystemZ::LOCGHI;
+ if (UseMI.getOperand(2).getReg() == Reg)
+ UseIdx = 2;
+ else if (UseMI.getOperand(1).getReg() == Reg)
+ UseIdx = 2, CommuteIdx = 1;
+ else
+ return false;
+ break;
+ default:
+ return false;
}
+
+ if (CommuteIdx != -1)
+ if (!commuteInstruction(UseMI, false, CommuteIdx, UseIdx))
+ return false;
+
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ UseMI.setDesc(get(NewUseOpc));
+ UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
+ if (DeleteDef)
+ DefMI.eraseFromParent();
+
+ return true;
}
bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
- if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode))
- return true;
- if (STI.hasLoadStoreOnCond2() && getConditionalLoadImmediate(Opcode))
- return true;
if (Opcode == SystemZ::Return ||
Opcode == SystemZ::Trap ||
Opcode == SystemZ::CallJG ||
unsigned CCMask = Pred[1].getImm();
assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
unsigned Opcode = MI.getOpcode();
- if (STI.hasLoadStoreOnCond()) {
- if (unsigned CondOpcode = getConditionalMove(Opcode)) {
- MI.setDesc(get(CondOpcode));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(CCValid)
- .addImm(CCMask)
- .addReg(SystemZ::CC, RegState::Implicit);
- return true;
- }
- }
- if (STI.hasLoadStoreOnCond2()) {
- if (unsigned CondOpcode = getConditionalLoadImmediate(Opcode)) {
- MI.setDesc(get(CondOpcode));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(CCValid)
- .addImm(CCMask)
- .addReg(SystemZ::CC, RegState::Implicit);
- return true;
- }
- }
if (Opcode == SystemZ::Trap) {
MI.setDesc(get(SystemZ::CondTrap));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH);
return true;
+ case SystemZ::LOCMux:
+ expandLOCPseudo(MI, SystemZ::LOC, SystemZ::LOCFH);
+ return true;
+
+ case SystemZ::LOCHIMux:
+ expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI);
+ return true;
+
+ case SystemZ::LOCRMux:
+ expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
+ return true;
+
case SystemZ::STCMux:
expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
return true;
expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH);
return true;
+ case SystemZ::STOCMux:
+ expandLOCPseudo(MI, SystemZ::STOC, SystemZ::STOCFH);
+ return true;
+
case SystemZ::LHIMux:
expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true);
return true;
unsigned LowOpcodeK, unsigned HighOpcode) const;
void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
+ void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const;
+ void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const;
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
virtual void anchor();
-
+
+protected:
+ /// Commutes the operands in the given instruction by changing the operands
+ /// order and/or changing the instruction's opcode and/or the immediate value
+ /// operand.
+ ///
+ /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands
+ /// to be commuted.
+ ///
+ /// Do not call this method for a non-commutable instruction or
+ /// non-commutable operands.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
+ unsigned CommuteOpIdx1,
+ unsigned CommuteOpIdx2) const override;
+
public:
explicit SystemZInstrInfo(SystemZSubtarget &STI);
bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const override;
+ bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
+ unsigned, unsigned, int&, int&, int&) const override;
+ void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const DebugLoc &DL, unsigned DstReg,
+ ArrayRef<MachineOperand> Cond, unsigned TrueReg,
+ unsigned FalseReg) const override;
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const override;
bool isPredicable(MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
def Select32 : SelectWrapper<GR32>;
def Select64 : SelectWrapper<GR64>;
-// We don't define 32-bit Mux stores because the low-only STOC should
-// always be used if possible.
+// We don't define 32-bit Mux stores if we don't have STOCFH, because the
+// low-only STOC should then always be used if possible.
defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>,
Requires<[FeatureHighWord]>;
defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>,
Requires<[FeatureHighWord]>;
+defm CondStore32Mux : CondStores<GRX32, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>,
+ Requires<[FeatureLoadStoreOnCond2]>;
defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16,
//===----------------------------------------------------------------------===//
let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
- // Load immediate on condition. Created by if-conversion.
- defm LOCHI : CondUnaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>;
- defm LOCGHI : CondUnaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>;
+ // Load immediate on condition. Matched via DAG pattern and created
+ // by the PeepholeOptimizer via FoldImmediate.
+ let hasSideEffects = 0 in {
+ // Expands to LOCHI or LOCHHI, depending on the choice of register.
+ def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>;
+ defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>;
+ defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>;
+ defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>;
+ }
+
+ // Move register on condition. Expanded from Select* pseudos and
+ // created by early if-conversion.
+ let hasSideEffects = 0, isCommutable = 1 in {
+ // Expands to LOCR or LOCFHR or a branch-and-move sequence,
+ // depending on the choice of registers.
+ def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>;
+ defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
+ }
+
+ // Load on condition. Matched via DAG pattern.
+ // Expands to LOC or LOCFH, depending on the choice of register.
+ def LOCMux : CondUnaryRSYPseudo<nonvolatile_load, GRX32, 4>;
+ defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>;
+
+ // Store on condition. Expanded from CondStore* pseudos.
+ // Expands to STOC or STOCFH, depending on the choice of register.
+ def STOCMux : CondStoreRSYPseudo<GRX32, 4>;
+ defm STOCFH : CondStoreRSYPair<"stocfh", 0xEBE1, GRH32, 4>;
// Define AsmParser extended mnemonics for each general condition-code mask.
foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
"Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
- def LOCHIAsm#V : FixedCondUnaryRIE<CV<V>, "lochi", 0xEC42, GR32,
- imm32sx16>;
- def LOCGHIAsm#V : FixedCondUnaryRIE<CV<V>, "locghi", 0xEC46, GR64,
- imm64sx16>;
+ def LOCHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochi", 0xEC42, GR32,
+ imm32sx16>;
+ def LOCGHIAsm#V : FixedCondBinaryRIE<CV<V>, "locghi", 0xEC46, GR64,
+ imm64sx16>;
+ def LOCHHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochhi", 0xEC4E, GRH32,
+ imm32sx16>;
+ def LOCFHRAsm#V : FixedCondBinaryRRF<CV<V>, "locfhr", 0xB9E0, GRH32, GRH32>;
+ def LOCFHAsm#V : FixedCondUnaryRSY<CV<V>, "locfh", 0xEBE0, GRH32, 4>;
+ def STOCFHAsm#V : FixedCondStoreRSY<CV<V>, "stocfh", 0xEBE1, GRH32, 4>;
}
}
let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
- // Move register on condition. Created by if-conversion.
- defm LOCR : CondUnaryRRFPair<"locr", 0xB9F2, GR32, GR32>;
- defm LOCGR : CondUnaryRRFPair<"locgr", 0xB9E2, GR64, GR64>;
+ // Move register on condition. Expanded from Select* pseudos and
+ // created by early if-conversion.
+ let hasSideEffects = 0, isCommutable = 1 in {
+ defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>;
+ defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>;
+ }
// Load on condition. Matched via DAG pattern.
defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>;
// Define AsmParser extended mnemonics for each general condition-code mask.
foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
"Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
- def LOCRAsm#V : FixedCondUnaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>;
- def LOCGRAsm#V : FixedCondUnaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>;
+ def LOCRAsm#V : FixedCondBinaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>;
+ def LOCGRAsm#V : FixedCondBinaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>;
def LOCAsm#V : FixedCondUnaryRSY<CV<V>, "loc", 0xEBF2, GR32, 4>;
def LOCGAsm#V : FixedCondUnaryRSY<CV<V>, "locg", 0xEBE2, GR64, 8>;
def STOCAsm#V : FixedCondStoreRSY<CV<V>, "stoc", 0xEBF3, GR32, 4>;
// Division and remainder
//===----------------------------------------------------------------------===//
-// Division and remainder, from registers.
-def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
-def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
-def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
-def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
+let hasSideEffects = 1 in { // Do not speculatively execute.
+ // Division and remainder, from registers.
+ def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
+ def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
+ def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
+ def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
-// Division and remainder, from memory.
-def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
-def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>;
-def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>;
-def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
+ // Division and remainder, from memory.
+ def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
+ def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>;
+ def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>;
+ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
+}
//===----------------------------------------------------------------------===//
// Shifts
let PostRAScheduler = 1;
// Extra cycles for a mispredicted branch.
- let MispredictPenalty = 8;
+ let MispredictPenalty = 20;
}
let SchedModel = Z13Model in {
def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>;
// Conditional move instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[FXa, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>;
-def : InstRW<[FXa, Lat2], (instregex "LOC(G)?HI(Asm.*)?$")>;
-def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>;
-def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
//===----------------------------------------------------------------------===//
// Sign extensions
let PostRAScheduler = 1;
// Extra cycles for a mispredicted branch.
- let MispredictPenalty = 8;
+ let MispredictPenalty = 16;
}
let SchedModel = Z196Model in {
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?R(Asm.*)?$")>;
-def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?HI(Asm.*)?$")>;
def : InstRW<[FXU, LSU, Lat6, EndGroup], (instregex "LOC(G)?(Asm.*)?$")>;
def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "STOC(G)?(Asm.*)?$")>;
let PostRAScheduler = 1;
// Extra cycles for a mispredicted branch.
- let MispredictPenalty = 8;
+ let MispredictPenalty = 16;
}
let SchedModel = ZEC12Model in {
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>;
-def : InstRW<[FXU, Lat2], (instregex "LOC(G)?HI(Asm.*)?$")>;
def : InstRW<[FXU, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>;
// This is important for reducing register pressure in vector code.
bool useAA() const override { return true; }
+ // Always enable the early if-conversion pass.
+ bool enableEarlyIfConversion() const override { return true; }
+
// Automatically generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
void addIRPasses() override;
bool addInstSelector() override;
+ bool addILPOpts() override;
void addPreSched2() override;
void addPreEmitPass() override;
};
return false;
}
+bool SystemZPassConfig::addILPOpts() {
+ addPass(&EarlyIfConverterID);
+ return true;
+}
+
void SystemZPassConfig::addPreSched2() {
+ addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
+
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
}
+++ /dev/null
-; Test LOCHI/LOCGHI
-;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-
-; CHECK-LABEL: bar1:
-; CHECK: lhi [[REG:%r[0-5]]], 42
-; CHECK: chi %r2, 0
-; CHECK: lochie [[REG]], 0
-define signext i32 @bar1(i32 signext %x) {
- %cmp = icmp ne i32 %x, 0
- %.x = select i1 %cmp, i32 42, i32 0
- ret i32 %.x
-}
-
-; CHECK-LABEL: bar2:
-; CHECK: ltgr [[REG:%r[0-5]]], %r2
-; CHECK: lghi %r2, 42
-; CHECK: locghie %r2, 0
-define signext i64 @bar2(i64 signext %x) {
- %cmp = icmp ne i64 %x, 0
- %.x = select i1 %cmp, i64 42, i64 0
- ret i64 %.x
-}
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
declare i32 @foo(i32 *)
; Test the simple case.
--- /dev/null
+; Test LOCFH. See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: -no-integrated-as | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case.
+define void @f1(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; ...and again with the operands swapped.
+define void @f2(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhl [[REG]], 0(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %other, i32 %easy
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check the high end of the aligned LOC range.
+define void @f3(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 524284(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check the next word up. Other sequences besides this one would be OK.
+define void @f4(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, 524288
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check the low end of the LOC range.
+define void @f5(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], -524288(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check the next word down, with the same comments as f4.
+define void @f6(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK-DAG: agfi %r2, -524292
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Try a frame index base.
+define void @f7(i32 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: locfhhe [[REG]], {{[0-9]+}}(%r15)
+; CHECK: br %r14
+ %ptr = alloca i32
+ call void @foo(i32 *%ptr)
+ %easy = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Try a case when an index is involved.
+define void @f8(i32 %limit, i64 %base, i64 %index) {
+; CHECK-LABEL: f8:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 42
+; CHECK: locfhhe [[REG]], 0({{%r[1-5]}})
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %add = add i64 %base, %index
+ %ptr = inttoptr i64 %add to i32 *
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Test that conditionally-executed loads do not use LOC, since it is allowed
+; to trap even when the condition is false.
+define void @f9(i32 %limit, i32 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: loc
+; CHECK: lfh
+; CHECK: br %r14
+entry:
+ %easy = call i32 asm "stepa $0", "=h"()
+ %cmp = icmp ule i32 %easy, %limit
+ br i1 %cmp, label %load, label %exit
+
+load:
+ %other = load i32, i32 *%ptr
+ br label %exit
+
+exit:
+ %res = phi i32 [ %easy, %entry ], [ %other, %load ]
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
; Test LOCR and LOCGR.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -verify-machineinstrs | FileCheck %s
+;
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
; Test LOCR.
define i32 @f1(i32 %a, i32 %b, i32 %limit) {
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
+
+; Check that we also get LOCR as a result of early if-conversion.
+define i32 @f5(i32 %a, i32 %b, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK: clfi %r4, 41
+; CHECK: locrh %r2, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %a, %if.then ], [ %b, %entry ]
+ ret i32 %res
+}
+
+; ... and likewise for LOCGR.
+define i64 @f6(i64 %a, i64 %b, i64 %limit) {
+; CHECK-LABEL: f6:
+; CHECK: clgfi %r4, 41
+; CHECK: locgrh %r2, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i64 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ %a, %if.then ], [ %b, %entry ]
+ ret i64 %res
+}
+
+; Check that inverting the condition works as well.
+define i32 @f7(i32 %a, i32 %b, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: clfi %r4, 41
+; CHECK: locrle %r2, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %b, %if.then ], [ %a, %entry ]
+ ret i32 %res
+}
+
+; ... and likewise for LOCGR.
+define i64 @f8(i64 %a, i64 %b, i64 %limit) {
+; CHECK-LABEL: f8:
+; CHECK: clgfi %r4, 41
+; CHECK: locgrle %r2, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i64 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ %b, %if.then ], [ %a, %entry ]
+ ret i64 %res
+}
+
--- /dev/null
+; Test LOCHI and LOCGHI.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
+
+define i32 @f1(i32 %x) {
+; CHECK-LABEL: f1:
+; CHECK: lhi [[REG:%r[0-5]]], 0
+; CHECK: chi %r2, 0
+; CHECK: lochilh [[REG]], 42
+; CHECK: br %r14
+ %cond = icmp ne i32 %x, 0
+ %res = select i1 %cond, i32 42, i32 0
+ ret i32 %res
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+; CHECK-LABEL: f2:
+; CHECK: chi %r2, 0
+; CHECK: lochilh %r3, 42
+; CHECK: br %r14
+ %cond = icmp ne i32 %x, 0
+ %res = select i1 %cond, i32 42, i32 %y
+ ret i32 %res
+}
+
+define i32 @f3(i32 %x, i32 %y) {
+; CHECK-LABEL: f3:
+; CHECK: chi %r2, 0
+; CHECK: lochie %r3, 42
+; CHECK: br %r14
+ %cond = icmp ne i32 %x, 0
+ %res = select i1 %cond, i32 %y, i32 42
+ ret i32 %res
+}
+
+define i64 @f4(i64 %x) {
+; CHECK-LABEL: f4:
+; CHECK: lghi [[REG:%r[0-5]]], 0
+; CHECK: cghi %r2, 0
+; CHECK: locghilh [[REG]], 42
+; CHECK: br %r14
+ %cond = icmp ne i64 %x, 0
+ %res = select i1 %cond, i64 42, i64 0
+ ret i64 %res
+}
+
+define i64 @f5(i64 %x, i64 %y) {
+; CHECK-LABEL: f5:
+; CHECK: cghi %r2, 0
+; CHECK: locghilh %r3, 42
+; CHECK: br %r14
+ %cond = icmp ne i64 %x, 0
+ %res = select i1 %cond, i64 42, i64 %y
+ ret i64 %res
+}
+
+define i64 @f6(i64 %x, i64 %y) {
+; CHECK-LABEL: f6:
+; CHECK: cghi %r2, 0
+; CHECK: locghie %r3, 42
+; CHECK: br %r14
+ %cond = icmp ne i64 %x, 0
+ %res = select i1 %cond, i64 %y, i64 42
+ ret i64 %res
+}
+
+; Check that we also get LOCHI as a result of early if-conversion.
+define i32 @f7(i32 %x, i32 %y) {
+; CHECK-LABEL: f7:
+; CHECK: chi %r2, 0
+; CHECK: lochie %r3, 42
+; CHECK: br %r14
+entry:
+ %cond = icmp ne i32 %x, 0
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %y, %if.then ], [ 42, %entry ]
+ ret i32 %res
+}
+
+; ... and the same for LOCGHI.
+define i64 @f8(i64 %x, i64 %y) {
+; CHECK-LABEL: f8:
+; CHECK: cghi %r2, 0
+; CHECK: locghie %r3, 42
+; CHECK: br %r14
+entry:
+ %cond = icmp ne i64 %x, 0
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ %y, %if.then ], [ 42, %entry ]
+ ret i64 %res
+}
+
+; Check that inverting the condition works as well.
+define i32 @f9(i32 %x, i32 %y) {
+; CHECK-LABEL: f9:
+; CHECK: chi %r2, 0
+; CHECK: lochilh %r3, 42
+; CHECK: br %r14
+entry:
+ %cond = icmp ne i32 %x, 0
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ 42, %if.then ], [ %y, %entry ]
+ ret i32 %res
+}
+
+; ... and the same for LOCGHI.
+define i64 @f10(i64 %x, i64 %y) {
+; CHECK-LABEL: f10:
+; CHECK: cghi %r2, 0
+; CHECK: locghilh %r3, 42
+; CHECK: br %r14
+entry:
+ %cond = icmp ne i64 %x, 0
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ 42, %if.then ], [ %y, %entry ]
+ ret i64 %res
+}
+
--- /dev/null
+; Test LOCFHR and LOCHHI.
+; See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: -no-integrated-as | FileCheck %s
+
+define void @f1(i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 42
+; CHECK: locfhrl [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ ret void
+}
+
+; FIXME: We should commute the LOCRMux to save one move.
+define void @f2(i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+ %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=r"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ ret void
+}
+
+define void @f3(i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG2:%r[0-5]]]
+; CHECK-DAG: stepb [[REG1:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+ %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+ %a = call i32 asm sideeffect "stepa $0", "=r"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ ret void
+}
+
+; FIXME: We should commute the LOCRMux to save one move.
+define void @f4(i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+ %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+ %a = call i32 asm sideeffect "stepa $0", "=r"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "r"(i32 %res)
+ call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ ret void
+}
+
+define void @f5(i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG2:%r[0-5]]]
+; CHECK-DAG: stepb [[REG1:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+ %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=r"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "r"(i32 %res)
+ call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ ret void
+}
+
+; Check that we also get LOCFHR as a result of early if-conversion.
+define void @f6(i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: locfhrle [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %a, %if.then ], [ %b, %entry ]
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ ret void
+}
+
+; Check that inverting the condition works as well.
+define void @f7(i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: locfhrh [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %b, %if.then ], [ %a, %entry ]
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ ret void
+}
+
+define void @f8(i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK: clfi %r2, 42
+; CHECK: lochhil [[REG:%r[0-5]]], 32767
+; CHECK: stepa [[REG]]
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 32767, i32 0
+ call void asm sideeffect "stepa $0", "h"(i32 %res)
+ ret void
+}
+
+define void @f9(i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK: clfi %r2, 42
+; CHECK: lochhil [[REG:%r[0-5]]], -32768
+; CHECK: stepa [[REG]]
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 -32768, i32 0
+ call void asm sideeffect "stepa $0", "h"(i32 %res)
+ ret void
+}
+
+; Check that we also get LOCHHI as a result of early if-conversion.
+define void @f10(i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: lochhile [[REG]], 123
+; CHECK: stepb [[REG]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ 123, %if.then ], [ %a, %entry ]
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check that inverting the condition works as well.
+define void @f11(i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: lochhih [[REG]], 123
+; CHECK: stepb [[REG]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %a, %if.then ], [ 123, %entry ]
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
declare void @foo(i32 *)
; Test the simple case, with the loaded value first.
--- /dev/null
+; Test STOCFHs that are presented as selects.
+; See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: -no-integrated-as | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhl [[REG]], 0(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %alt, i32 %orig
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STOC range.
+define void @f3(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 524284(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the next word up. Other sequences besides this one would be OK.
+define void @f4(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, 524288
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the low end of the STOC range.
+define void @f5(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], -524288(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the next word down, with the same comments as f8.
+define void @f6(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, -524292
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Try a frame index base.
+define void @f7(i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: stocfhhe [[REG]], {{[0-9]+}}(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+ %ptr = alloca i32
+ call void @foo(i32 *%ptr)
+ %alt = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ call void @foo(i32 *%ptr)
+ ret void
+}
+
+; Test that conditionally-executed stores do not use STOC, since STOC
+; is allowed to trap even when the condition is false.
+define void @f8(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: stoc
+; CHECK: stfh
+; CHECK: br %r14
+entry:
+ %val = call i32 asm "stepa $0", "=h"()
+ %cmp = icmp ule i32 %a, %b
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 %val, i32 *%dest
+ br label %exit
+
+exit:
+ ret void
+}
#CHECK: locghi %r11, 32512, 15
0xec 0xbf 0x7f 0x00 0x00 0x46
+
+#CHECK: lochhi %r11, 42, 0
+0xec 0xb0 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhio %r11, 42
+0xec 0xb1 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhih %r11, 42
+0xec 0xb2 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhinle %r11, 42
+0xec 0xb3 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhil %r11, -1
+0xec 0xb4 0xff 0xff 0x00 0x4e
+
+#CHECK: lochhinhe %r11, 42
+0xec 0xb5 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhilh %r11, -1
+0xec 0xb6 0xff 0xff 0x00 0x4e
+
+#CHECK: lochhine %r11, 0
+0xec 0xb7 0x00 0x00 0x00 0x4e
+
+#CHECK: lochhie %r11, 0
+0xec 0xb8 0x00 0x00 0x00 0x4e
+
+#CHECK: lochhinlh %r11, 42
+0xec 0xb9 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhihe %r11, 255
+0xec 0xba 0x00 0xff 0x00 0x4e
+
+#CHECK: lochhinl %r11, 255
+0xec 0xbb 0x00 0xff 0x00 0x4e
+
+#CHECK: lochhile %r11, 32767
+0xec 0xbc 0x7f 0xff 0x00 0x4e
+
+#CHECK: lochhinh %r11, 32767
+0xec 0xbd 0x7f 0xff 0x00 0x4e
+
+#CHECK: lochhino %r11, 32512
+0xec 0xbe 0x7f 0x00 0x00 0x4e
+
+#CHECK: lochhi %r11, 32512, 15
+0xec 0xbf 0x7f 0x00 0x00 0x4e
+
+# CHECK: locfh %r7, 6399(%r8), 0
+0xeb 0x70 0x88 0xff 0x01 0xe0
+
+# CHECK: locfho %r7, 6399(%r8)
+0xeb 0x71 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhh %r7, 6399(%r8)
+0xeb 0x72 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnle %r7, 6399(%r8)
+0xeb 0x73 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhl %r7, 6399(%r8)
+0xeb 0x74 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnhe %r7, 6399(%r8)
+0xeb 0x75 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhlh %r7, 6399(%r8)
+0xeb 0x76 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhne %r7, 6399(%r8)
+0xeb 0x77 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhe %r7, 6399(%r8)
+0xeb 0x78 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnlh %r7, 6399(%r8)
+0xeb 0x79 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhhe %r7, 6399(%r8)
+0xeb 0x7a 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnl %r7, 6399(%r8)
+0xeb 0x7b 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhle %r7, 6399(%r8)
+0xeb 0x7c 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnh %r7, 6399(%r8)
+0xeb 0x7d 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhno %r7, 6399(%r8)
+0xeb 0x7e 0x88 0xff 0x01 0xe0
+
+# CHECK: locfh %r7, 6399(%r8), 15
+0xeb 0x7f 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhr %r11, %r3, 0
+0xb9 0xe0 0x00 0xb3
+
+# CHECK: locfhro %r11, %r3
+0xb9 0xe0 0x10 0xb3
+
+# CHECK: locfhrh %r11, %r3
+0xb9 0xe0 0x20 0xb3
+
+# CHECK: locfhrnle %r11, %r3
+0xb9 0xe0 0x30 0xb3
+
+# CHECK: locfhrl %r11, %r3
+0xb9 0xe0 0x40 0xb3
+
+# CHECK: locfhrnhe %r11, %r3
+0xb9 0xe0 0x50 0xb3
+
+# CHECK: locfhrlh %r11, %r3
+0xb9 0xe0 0x60 0xb3
+
+# CHECK: locfhrne %r11, %r3
+0xb9 0xe0 0x70 0xb3
+
+# CHECK: locfhre %r11, %r3
+0xb9 0xe0 0x80 0xb3
+
+# CHECK: locfhrnlh %r11, %r3
+0xb9 0xe0 0x90 0xb3
+
+# CHECK: locfhrhe %r11, %r3
+0xb9 0xe0 0xa0 0xb3
+
+# CHECK: locfhrnl %r11, %r3
+0xb9 0xe0 0xb0 0xb3
+
+# CHECK: locfhrle %r11, %r3
+0xb9 0xe0 0xc0 0xb3
+
+# CHECK: locfhrnh %r11, %r3
+0xb9 0xe0 0xd0 0xb3
+
+# CHECK: locfhrno %r11, %r3
+0xb9 0xe0 0xe0 0xb3
+
+# CHECK: locfhr %r11, %r3, 15
+0xb9 0xe0 0xf0 0xb3
+
+# CHECK: stocfh %r1, 2(%r3), 0
+0xeb 0x10 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfho %r1, 2(%r3)
+0xeb 0x11 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhh %r1, 2(%r3)
+0xeb 0x12 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnle %r1, 2(%r3)
+0xeb 0x13 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhl %r1, 2(%r3)
+0xeb 0x14 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnhe %r1, 2(%r3)
+0xeb 0x15 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhlh %r1, 2(%r3)
+0xeb 0x16 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhne %r1, 2(%r3)
+0xeb 0x17 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhe %r1, 2(%r3)
+0xeb 0x18 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnlh %r1, 2(%r3)
+0xeb 0x19 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhhe %r1, 2(%r3)
+0xeb 0x1a 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnl %r1, 2(%r3)
+0xeb 0x1b 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhle %r1, 2(%r3)
+0xeb 0x1c 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnh %r1, 2(%r3)
+0xeb 0x1d 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhno %r1, 2(%r3)
+0xeb 0x1e 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfh %r1, 2(%r3), 15
+0xeb 0x1f 0x30 0x02 0x00 0xe1
+
locghie %f0, 0
locghie 0, %r0
+#CHECK: error: invalid operand
+#CHECK: lochhie %r0, 66000
+#CHECK: error: invalid operand
+#CHECK: lochhie %f0, 0
+#CHECK: error: invalid operand
+#CHECK: lochhie 0, %r0
+
+ lochhie %r0, 66000
+ lochhie %f0, 0
+ lochhie 0, %r0
+
+#CHECK: error: invalid operand
+#CHECK: locfh %r0,0,-1
+#CHECK: error: invalid operand
+#CHECK: locfh %r0,0,16
+#CHECK: error: invalid operand
+#CHECK: locfh %r0,-524289,1
+#CHECK: error: invalid operand
+#CHECK: locfh %r0,524288,1
+#CHECK: error: invalid use of indexed addressing
+#CHECK: locfh %r0,0(%r1,%r2),1
+
+ locfh %r0,0,-1
+ locfh %r0,0,16
+ locfh %r0,-524289,1
+ locfh %r0,524288,1
+ locfh %r0,0(%r1,%r2),1
+
+#CHECK: error: invalid operand
+#CHECK: locfhr %r0,%r0,-1
+#CHECK: error: invalid operand
+#CHECK: locfhr %r0,%r0,16
+
+ locfhr %r0,%r0,-1
+ locfhr %r0,%r0,16
+
+#CHECK: error: invalid operand
+#CHECK: stocfh %r0,0,-1
+#CHECK: error: invalid operand
+#CHECK: stocfh %r0,0,16
+#CHECK: error: invalid operand
+#CHECK: stocfh %r0,-524289,1
+#CHECK: error: invalid operand
+#CHECK: stocfh %r0,524288,1
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stocfh %r0,0(%r1,%r2),1
+
+ stocfh %r0,0,-1
+ stocfh %r0,0,16
+ stocfh %r0,-524289,1
+ stocfh %r0,524288,1
+ stocfh %r0,0(%r1,%r2),1
+
locghinh %r11, 32767
locghino %r11, 32512
locghi %r11, 32512, 15
+
+#CHECK: lochhi %r11, 42, 0 # encoding: [0xec,0xb0,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhio %r11, 42 # encoding: [0xec,0xb1,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhih %r11, 42 # encoding: [0xec,0xb2,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhinle %r11, 42 # encoding: [0xec,0xb3,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhil %r11, -1 # encoding: [0xec,0xb4,0xff,0xff,0x00,0x4e]
+#CHECK: lochhinhe %r11, 42 # encoding: [0xec,0xb5,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhilh %r11, -1 # encoding: [0xec,0xb6,0xff,0xff,0x00,0x4e]
+#CHECK: lochhine %r11, 0 # encoding: [0xec,0xb7,0x00,0x00,0x00,0x4e]
+#CHECK: lochhie %r11, 0 # encoding: [0xec,0xb8,0x00,0x00,0x00,0x4e]
+#CHECK: lochhinlh %r11, 42 # encoding: [0xec,0xb9,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhihe %r11, 255 # encoding: [0xec,0xba,0x00,0xff,0x00,0x4e]
+#CHECK: lochhinl %r11, 255 # encoding: [0xec,0xbb,0x00,0xff,0x00,0x4e]
+#CHECK: lochhile %r11, 32767 # encoding: [0xec,0xbc,0x7f,0xff,0x00,0x4e]
+#CHECK: lochhinh %r11, 32767 # encoding: [0xec,0xbd,0x7f,0xff,0x00,0x4e]
+#CHECK: lochhino %r11, 32512 # encoding: [0xec,0xbe,0x7f,0x00,0x00,0x4e]
+#CHECK: lochhi %r11, 32512, 15 # encoding: [0xec,0xbf,0x7f,0x00,0x00,0x4e]
+
+ lochhi %r11, 42, 0
+ lochhio %r11, 42
+ lochhih %r11, 42
+ lochhinle %r11, 42
+ lochhil %r11, -1
+ lochhinhe %r11, 42
+ lochhilh %r11, -1
+ lochhine %r11, 0
+ lochhie %r11, 0
+ lochhinlh %r11, 42
+ lochhihe %r11, 255
+ lochhinl %r11, 255
+ lochhile %r11, 32767
+ lochhinh %r11, 32767
+ lochhino %r11, 32512
+ lochhi %r11, 32512, 15
+
+#CHECK: locfh %r0, 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe0]
+#CHECK: locfh %r0, 0, 15 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe0]
+#CHECK: locfh %r0, -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe0]
+#CHECK: locfh %r0, 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe0]
+#CHECK: locfh %r0, 0(%r1), 0 # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe0]
+#CHECK: locfh %r0, 0(%r15), 0 # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe0]
+#CHECK: locfh %r15, 0, 0 # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe0]
+#CHECK: locfh %r1, 4095(%r2), 3 # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe0]
+
+ locfh %r0,0,0
+ locfh %r0,0,15
+ locfh %r0,-524288,0
+ locfh %r0,524287,0
+ locfh %r0,0(%r1),0
+ locfh %r0,0(%r15),0
+ locfh %r15,0,0
+ locfh %r1,4095(%r2),3
+
+#CHECK: locfho %r1, 2(%r3) # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe0]
+#CHECK: locfhh %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe0]
+#CHECK: locfhp %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnle %r1, 2(%r3) # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe0]
+#CHECK: locfhl %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe0]
+#CHECK: locfhm %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnhe %r1, 2(%r3) # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe0]
+#CHECK: locfhlh %r1, 2(%r3) # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe0]
+#CHECK: locfhne %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnz %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe0]
+#CHECK: locfhe %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe0]
+#CHECK: locfhz %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnlh %r1, 2(%r3) # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe0]
+#CHECK: locfhhe %r1, 2(%r3) # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnl %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnm %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe0]
+#CHECK: locfhle %r1, 2(%r3) # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnh %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnp %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe0]
+#CHECK: locfhno %r1, 2(%r3) # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe0]
+
+ locfho %r1,2(%r3)
+ locfhh %r1,2(%r3)
+ locfhp %r1,2(%r3)
+ locfhnle %r1,2(%r3)
+ locfhl %r1,2(%r3)
+ locfhm %r1,2(%r3)
+ locfhnhe %r1,2(%r3)
+ locfhlh %r1,2(%r3)
+ locfhne %r1,2(%r3)
+ locfhnz %r1,2(%r3)
+ locfhe %r1,2(%r3)
+ locfhz %r1,2(%r3)
+ locfhnlh %r1,2(%r3)
+ locfhhe %r1,2(%r3)
+ locfhnl %r1,2(%r3)
+ locfhnm %r1,2(%r3)
+ locfhle %r1,2(%r3)
+ locfhnh %r1,2(%r3)
+ locfhnp %r1,2(%r3)
+ locfhno %r1,2(%r3)
+
+#CHECK: locfhr %r1, %r2, 0 # encoding: [0xb9,0xe0,0x00,0x12]
+#CHECK: locfhr %r1, %r2, 15 # encoding: [0xb9,0xe0,0xf0,0x12]
+
+ locfhr %r1,%r2,0
+ locfhr %r1,%r2,15
+
+#CHECK: locfhro %r1, %r3 # encoding: [0xb9,0xe0,0x10,0x13]
+#CHECK: locfhrh %r1, %r3 # encoding: [0xb9,0xe0,0x20,0x13]
+#CHECK: locfhrp %r1, %r3 # encoding: [0xb9,0xe0,0x20,0x13]
+#CHECK: locfhrnle %r1, %r3 # encoding: [0xb9,0xe0,0x30,0x13]
+#CHECK: locfhrl %r1, %r3 # encoding: [0xb9,0xe0,0x40,0x13]
+#CHECK: locfhrm %r1, %r3 # encoding: [0xb9,0xe0,0x40,0x13]
+#CHECK: locfhrnhe %r1, %r3 # encoding: [0xb9,0xe0,0x50,0x13]
+#CHECK: locfhrlh %r1, %r3 # encoding: [0xb9,0xe0,0x60,0x13]
+#CHECK: locfhrne %r1, %r3 # encoding: [0xb9,0xe0,0x70,0x13]
+#CHECK: locfhrnz %r1, %r3 # encoding: [0xb9,0xe0,0x70,0x13]
+#CHECK: locfhre %r1, %r3 # encoding: [0xb9,0xe0,0x80,0x13]
+#CHECK: locfhrz %r1, %r3 # encoding: [0xb9,0xe0,0x80,0x13]
+#CHECK: locfhrnlh %r1, %r3 # encoding: [0xb9,0xe0,0x90,0x13]
+#CHECK: locfhrhe %r1, %r3 # encoding: [0xb9,0xe0,0xa0,0x13]
+#CHECK: locfhrnl %r1, %r3 # encoding: [0xb9,0xe0,0xb0,0x13]
+#CHECK: locfhrnm %r1, %r3 # encoding: [0xb9,0xe0,0xb0,0x13]
+#CHECK: locfhrle %r1, %r3 # encoding: [0xb9,0xe0,0xc0,0x13]
+#CHECK: locfhrnh %r1, %r3 # encoding: [0xb9,0xe0,0xd0,0x13]
+#CHECK: locfhrnp %r1, %r3 # encoding: [0xb9,0xe0,0xd0,0x13]
+#CHECK: locfhrno %r1, %r3 # encoding: [0xb9,0xe0,0xe0,0x13]
+
+ locfhro %r1,%r3
+ locfhrh %r1,%r3
+ locfhrp %r1,%r3
+ locfhrnle %r1,%r3
+ locfhrl %r1,%r3
+ locfhrm %r1,%r3
+ locfhrnhe %r1,%r3
+ locfhrlh %r1,%r3
+ locfhrne %r1,%r3
+ locfhrnz %r1,%r3
+ locfhre %r1,%r3
+ locfhrz %r1,%r3
+ locfhrnlh %r1,%r3
+ locfhrhe %r1,%r3
+ locfhrnl %r1,%r3
+ locfhrnm %r1,%r3
+ locfhrle %r1,%r3
+ locfhrnh %r1,%r3
+ locfhrnp %r1,%r3
+ locfhrno %r1,%r3
+
+#CHECK: stocfh %r0, 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe1]
+#CHECK: stocfh %r0, 0, 15 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe1]
+#CHECK: stocfh %r0, -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe1]
+#CHECK: stocfh %r0, 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe1]
+#CHECK: stocfh %r0, 0(%r1), 0 # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe1]
+#CHECK: stocfh %r0, 0(%r15), 0 # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe1]
+#CHECK: stocfh %r15, 0, 0 # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe1]
+#CHECK: stocfh %r1, 4095(%r2), 3 # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe1]
+
+ stocfh %r0,0,0
+ stocfh %r0,0,15
+ stocfh %r0,-524288,0
+ stocfh %r0,524287,0
+ stocfh %r0,0(%r1),0
+ stocfh %r0,0(%r15),0
+ stocfh %r15,0,0
+ stocfh %r1,4095(%r2),3
+
+#CHECK: stocfho %r1, 2(%r3) # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhh %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhp %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnle %r1, 2(%r3) # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhl %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhm %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnhe %r1, 2(%r3) # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhlh %r1, 2(%r3) # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhne %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnz %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhe %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhz %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnlh %r1, 2(%r3) # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhhe %r1, 2(%r3) # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnl %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnm %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhle %r1, 2(%r3) # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnh %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnp %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhno %r1, 2(%r3) # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe1]
+
+ stocfho %r1,2(%r3)
+ stocfhh %r1,2(%r3)
+ stocfhp %r1,2(%r3)
+ stocfhnle %r1,2(%r3)
+ stocfhl %r1,2(%r3)
+ stocfhm %r1,2(%r3)
+ stocfhnhe %r1,2(%r3)
+ stocfhlh %r1,2(%r3)
+ stocfhne %r1,2(%r3)
+ stocfhnz %r1,2(%r3)
+ stocfhe %r1,2(%r3)
+ stocfhz %r1,2(%r3)
+ stocfhnlh %r1,2(%r3)
+ stocfhhe %r1,2(%r3)
+ stocfhnl %r1,2(%r3)
+ stocfhnm %r1,2(%r3)
+ stocfhle %r1,2(%r3)
+ stocfhnh %r1,2(%r3)
+ stocfhnp %r1,2(%r3)
+ stocfhno %r1,2(%r3)
+