if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
unsigned Reg = MO.getReg();
- if (!Reg)
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
continue;
LocalDefs.push_back(Reg);
addRegAndItsAliases(Reg, TRI, LocalDefsSet);
WebAssemblyLowerBrUnless.cpp
WebAssemblyMachineFunctionInfo.cpp
WebAssemblyMCInstLower.cpp
+ WebAssemblyOptimizeLiveIntervals.cpp
WebAssemblyOptimizeReturned.cpp
WebAssemblyPeephole.cpp
WebAssemblyPEI.cpp
+ WebAssemblyPrepareForLiveIntervals.cpp
WebAssemblyRegisterInfo.cpp
WebAssemblyRegColoring.cpp
WebAssemblyRegNumbering.cpp
WebAssemblyRegStackify.cpp
+ WebAssemblyReplacePhysRegs.cpp
WebAssemblySelectionDAGInfo.cpp
WebAssemblySetP2AlignOperands.cpp
WebAssemblyStoreResults.cpp
class WebAssemblyTargetMachine;
class FunctionPass;
+// LLVM IR passes.
FunctionPass *createWebAssemblyOptimizeReturned();
+// ISel and immediate followup passes.
FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createWebAssemblyArgumentMove();
FunctionPass *createWebAssemblySetP2AlignOperands();
+// Regalloc-time passes.
+FunctionPass *createWebAssemblyPEI();
+
+// Late passes.
+FunctionPass *createWebAssemblyReplacePhysRegs();
+FunctionPass *createWebAssemblyPrepareForLiveIntervals();
+FunctionPass *createWebAssemblyOptimizeLiveIntervals();
FunctionPass *createWebAssemblyStoreResults();
FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
-FunctionPass *createWebAssemblyPEI();
FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
FunctionPass *createWebAssemblyCFGStackify();
FunctionPass *createWebAssemblyLowerBrUnless();
//===----------------------------------------------------------------------===//
MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
- const TargetRegisterClass *TRC =
- TargetRegisterInfo::isVirtualRegister(RegNo)
- ? MRI->getRegClass(RegNo)
- : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
+ const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
if (TRC->hasType(T))
return T;
LocalTypes.push_back(getRegType(VReg));
AnyWARegs = true;
}
- auto &PhysRegs = MFI->getPhysRegs();
- for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) {
- if (PhysRegs[PReg] == -1U)
- continue;
- LocalTypes.push_back(getRegType(PReg));
- AnyWARegs = true;
- }
if (AnyWARegs)
getTargetStreamer()->emitLocal(LocalTypes);
MachineBasicBlock::iterator &InsertStore,
DebugLoc DL) {
auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
- unsigned SPAddr =
- MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass *PtrRC =
+ MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+ unsigned SPAddr = MRI.createVirtualRegister(PtrRC);
+ unsigned Discard = MRI.createVirtualRegister(PtrRC);
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
auto *MMO = new MachineMemOperand(MachinePointerInfo(),
MachineMemOperand::MOStore, 4, 4);
BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32),
- SrcReg)
+ Discard)
.addImm(0)
.addReg(SPAddr)
.addImm(2) // p2align
.addReg(SrcReg)
.addMemOperand(MMO);
- MF.getInfo<WebAssemblyFunctionInfo>()->stackifyVReg(SPAddr);
}
MachineBasicBlock::iterator
auto *MFI = MF.getFrameInfo();
assert(MFI->getCalleeSavedInfo().empty() &&
"WebAssembly should not have callee-saved registers");
- auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
if (!needsSP(MF, *MFI)) return;
uint64_t StackSize = MFI->getStackSize();
auto InsertPt = MBB.begin();
DebugLoc DL;
- unsigned SPAddr = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ const TargetRegisterClass *PtrRC =
+ MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+ unsigned SPAddr = MRI.createVirtualRegister(PtrRC);
+ unsigned SPReg = MRI.createVirtualRegister(PtrRC);
auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
.addExternalSymbol(SPSymbol);
.addReg(SPAddr) // addr
.addImm(2) // p2align
.addMemOperand(LoadMMO);
- WFI->stackifyVReg(SPAddr);
if (StackSize) {
// Subtract the frame size
- unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
.addImm(StackSize);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32),
WebAssembly::SP32)
.addReg(SPReg)
.addReg(OffsetReg);
- WFI->stackifyVReg(OffsetReg);
- WFI->stackifyVReg(SPReg);
}
if (hasFP(MF)) {
// Unlike most conventional targets (where FP points to the saved FP),
// FP points to the bottom of the fixed-size locals, so we can use positive
// offsets in load/store instructions.
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY_LOCAL_I32),
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY),
WebAssembly::FP32)
.addReg(WebAssembly::SP32);
}
auto *MFI = MF.getFrameInfo();
uint64_t StackSize = MFI->getStackSize();
if (!needsSP(MF, *MFI) || !needsSPWriteback(MF, *MFI)) return;
- auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
auto &MRI = MF.getRegInfo();
auto InsertPt = MBB.getFirstTerminator();
DebugLoc DL;
- if (InsertPt != MBB.end()) {
+ if (InsertPt != MBB.end())
DL = InsertPt->getDebugLoc();
- // If code has been stackified with the return, disconnect it so that we
- // don't break the tree when we insert code just before the return.
- if (InsertPt->isReturn() && InsertPt->getNumExplicitOperands() != 0) {
- WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
- MFI.unstackifyVReg(InsertPt->getOperand(0).getReg());
- }
- }
-
// Restore the stack pointer. If we had fixed-size locals, add the offset
// subtracted in the prolog.
unsigned SPReg = 0;
MachineBasicBlock::iterator InsertAddr = InsertPt;
if (StackSize) {
- unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ const TargetRegisterClass *PtrRC =
+ MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+ unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
InsertAddr =
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
.addImm(StackSize);
// In the epilog we don't need to write the result back to the SP32 physreg
// because it won't be used again. We can use a stackified register instead.
- SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ SPReg = MRI.createVirtualRegister(PtrRC);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg)
.addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
.addReg(OffsetReg);
- WFI->stackifyVReg(OffsetReg);
- WFI->stackifyVReg(SPReg);
} else {
SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
}
/// - defined and used in LIFO order with other stack registers
BitVector VRegStackified;
- // One entry for each possible target reg. we expect it to be small.
- std::vector<unsigned> PhysRegs;
-
// A virtual register holding the pointer to the vararg buffer for vararg
// functions. It is created and set in TLI::LowerFormalArguments and read by
// TLI::LowerVASTART
unsigned VarargVreg = -1U;
public:
- explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {
- PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U);
- }
+ explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
~WebAssemblyFunctionInfo() override;
void addParam(MVT VT) { Params.push_back(VT); }
VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1);
VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg));
}
- void unstackifyVReg(unsigned VReg) {
- if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
- return;
- VRegStackified.reset(TargetRegisterInfo::virtReg2Index(VReg));
- }
bool isVRegStackified(unsigned VReg) const {
if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
return false;
WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg;
}
unsigned getWAReg(unsigned Reg) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size());
- return WARegs[TargetRegisterInfo::virtReg2Index(Reg)];
- }
- return PhysRegs[Reg];
+ assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size());
+ return WARegs[TargetRegisterInfo::virtReg2Index(Reg)];
}
// If new virtual registers are created after initWARegs has been called,
// this function can be used to add WebAssembly register mappings for them.
assert(VReg = WARegs.size());
WARegs.push_back(WAReg);
}
-
- void addPReg(unsigned PReg, unsigned WAReg) {
- assert(PReg < WebAssembly::NUM_TARGET_REGS);
- assert(WAReg < -1U);
- PhysRegs[PReg] = WAReg;
- }
- const std::vector<unsigned> &getPhysRegs() const { return PhysRegs; }
};
} // end namespace llvm
--- /dev/null
+//===--- WebAssemblyOptimizeLiveIntervals.cpp - LiveInterval processing ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Optimize LiveIntervals for use in a post-RA context.
+//
+/// LiveIntervals normally runs before register allocation when the code is
+/// only recently lowered out of SSA form, so it's uncommon for registers to
+/// have multiple defs, and then they do, the defs are usually closely related.
+/// Later, after coalescing, tail duplication, and other optimizations, it's
+/// more common to see registers with multiple unrelated defs. This pass
+/// updates LiveIntervalAnalysis to distribute the value numbers across separate
+/// LiveIntervals.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-optimize-live-intervals"
+
+namespace {
+class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly Optimize Live Intervals";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(LiveVariablesID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyOptimizeLiveIntervals() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyOptimizeLiveIntervals::ID = 0;
+FunctionPass *llvm::createWebAssemblyOptimizeLiveIntervals() {
+ return new WebAssemblyOptimizeLiveIntervals();
+}
+
+bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** Optimize LiveIntervals **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+ // We don't preserve SSA form.
+ MRI.leaveSSA();
+
+ assert(MRI.tracksLiveness() &&
+ "OptimizeLiveIntervals expects liveness");
+
+ // Split multiple-VN LiveIntervals into multiple LiveIntervals.
+ SmallVector<LiveInterval*, 4> SplitLIs;
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+
+ LIS.splitSeparateComponents(LIS.getInterval(Reg), SplitLIs);
+ SplitLIs.clear();
+ }
+
+ // In PrepareForLiveIntervals, we conservatively inserted IMPLICIT_DEF
+ // instructions to satisfy LiveIntervals' requirement that all uses be
+ // dominated by defs. Now that LiveIntervals has computed which of these
+ // defs are actually needed and which are dead, remove the dead ones.
+ for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII++;
+ if (MI->isImplicitDef() && MI->getOperand(0).isDead()) {
+ LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg());
+ LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*MI).getRegSlot());
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+ }
+
+ return false;
+}
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/InlineAsm.h"
--- /dev/null
+//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Fix up code to meet LiveInterval's requirements.
+///
+/// Some CodeGen passes don't preserve LiveInterval's requirements, because
+/// they run after register allocation and it isn't important. However,
+/// WebAssembly runs LiveIntervals in a late pass. This pass transforms code
+/// to meet LiveIntervals' requirements; primarily, it ensures that all
+/// virtual register uses have definitions (IMPLICIT_DEF definitions if
+/// nothing else).
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-prepare-for-live-intervals"
+
+namespace {
+class WebAssemblyPrepareForLiveIntervals final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {}
+
+private:
+ const char *getPassName() const override {
+ return "WebAssembly Prepare For LiveIntervals";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyPrepareForLiveIntervals::ID = 0;
+FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() {
+ return new WebAssemblyPrepareForLiveIntervals();
+}
+
+/// Test whether the given instruction is an ARGUMENT.
+static bool IsArgument(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::ARGUMENT_I32:
+ case WebAssembly::ARGUMENT_I64:
+ case WebAssembly::ARGUMENT_F32:
+ case WebAssembly::ARGUMENT_F64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Test whether the given register has an ARGUMENT def.
+static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
+ for (auto &Def : MRI.def_instructions(Reg))
+ if (IsArgument(&Def))
+ return true;
+ return false;
+}
+
+bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Prepare For LiveIntervals **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ bool Changed = false;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ MachineBasicBlock &Entry = *MF.begin();
+
+ assert(!mustPreserveAnalysisID(LiveIntervalsID) &&
+ "LiveIntervals shouldn't be active yet!");
+
+ // We don't preserve SSA form.
+ MRI.leaveSSA();
+
+ // BranchFolding and perhaps other passes don't preserve IMPLICIT_DEF
+ // instructions. LiveIntervals requires that all paths to virtual register
+ // uses provide a definition. Insert IMPLICIT_DEFs in the entry block to
+ // conservatively satisfy this.
+ //
+ // TODO: This is fairly heavy-handed; find a better approach.
+ //
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+ // Skip unused registers.
+ if (MRI.use_nodbg_empty(Reg))
+ continue;
+
+ // Skip registers that have an ARGUMENT definition.
+ if (HasArgumentDef(Reg, MRI))
+ continue;
+
+ BuildMI(Entry, Entry.begin(), DebugLoc(),
+ TII.get(WebAssembly::IMPLICIT_DEF), Reg);
+ Changed = true;
+ }
+
+ // Move ARGUMENT_* instructions to the top of the entry block, so that their
+ // liveness reflects the fact that these really are live-in values.
+ for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII++;
+ if (IsArgument(MI)) {
+ MI->removeFromParent();
+ Entry.insert(Entry.begin(), MI);
+ }
+ }
+
+ // Ok, we're now ready to run LiveIntervalAnalysis again.
+ MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
+
+ return Changed;
+}
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- const MachineFrameInfo &FrameInfo = *MF.getFrameInfo();
MFI.initWARegs();
case WebAssembly::ARGUMENT_I32:
case WebAssembly::ARGUMENT_I64:
case WebAssembly::ARGUMENT_F32:
- case WebAssembly::ARGUMENT_F64:
+ case WebAssembly::ARGUMENT_F64: {
+ int64_t Imm = MI.getOperand(1).getImm();
DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg "
- << MI.getOperand(1).getImm() << "\n");
- MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm());
+ << Imm << "\n");
+ MFI.setWAReg(MI.getOperand(0).getReg(), Imm);
break;
+ }
default:
break;
}
MFI.setWAReg(VReg, CurReg++);
}
}
- // Allocate locals for used physical registers
- bool HasFP =
- MF.getSubtarget<WebAssemblySubtarget>().getFrameLowering()->hasFP(MF);
- if (FrameInfo.getStackSize() > 0 || FrameInfo.adjustsStack() || HasFP) {
- DEBUG(dbgs() << "PReg SP " << CurReg << "\n");
- MFI.addPReg(WebAssembly::SP32, CurReg++);
- }
- if (HasFP) {
- DEBUG(dbgs() << "PReg FP " << CurReg << "\n");
- MFI.addPReg(WebAssembly::FP32, CurReg++);
- }
return true;
}
continue;
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
+ // from moving down, and we've already checked for that.
+ if (Reg == WebAssembly::ARGUMENTS)
+ continue;
// If the physical register is never modified, ignore it.
if (!MRI.isPhysRegModified(Reg))
continue;
}
// Ask LiveIntervals whether moving this virtual register use or def to
- // Insert will change value numbers are seen.
+ // Insert will change which value numbers are seen.
const LiveInterval &LI = LIS.getInterval(Reg);
VNInfo *DefVNI =
MO.isDef() ? LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot())
static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
const MachineBasicBlock &MBB,
const MachineRegisterInfo &MRI,
- const MachineDominatorTree &MDT) {
+ const MachineDominatorTree &MDT,
+ LiveIntervals &LIS) {
+ const LiveInterval &LI = LIS.getInterval(Reg);
+
+ const MachineInstr *OneUseInst = OneUse.getParent();
+ VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
+
for (const MachineOperand &Use : MRI.use_operands(Reg)) {
if (&Use == &OneUse)
continue;
+
const MachineInstr *UseInst = Use.getParent();
+ VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
+
+ if (UseVNI != OneUseVNI)
+ continue;
+
const MachineInstr *OneUseInst = OneUse.getParent();
if (UseInst->getOpcode() == TargetOpcode::PHI) {
// Test that the PHI use, which happens on the CFG edge rather than
/// A single-use def in the same block with no intervening memory or register
/// dependencies; move the def down and nest it with the current instruction.
-static MachineInstr *MoveForSingleUse(unsigned Reg, MachineInstr *Def,
+static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
+ MachineInstr *Def,
MachineBasicBlock &MBB,
MachineInstr *Insert, LiveIntervals &LIS,
- WebAssemblyFunctionInfo &MFI) {
+ WebAssemblyFunctionInfo &MFI,
+ MachineRegisterInfo &MRI) {
MBB.splice(Insert, &MBB, Def);
LIS.handleMove(*Def);
- MFI.stackifyVReg(Reg);
+
+ if (MRI.hasOneDef(Reg)) {
+ MFI.stackifyVReg(Reg);
+ } else {
+ unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ Def->getOperand(0).setReg(NewReg);
+ Op.setReg(NewReg);
+
+ // Tell LiveIntervals about the new register.
+ LIS.createAndComputeVirtRegInterval(NewReg);
+
+ // Tell LiveIntervals about the changes to the old register.
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*Def).getRegSlot());
+ LIS.shrinkToUses(&LI);
+
+ MFI.stackifyVReg(NewReg);
+ }
+
ImposeStackOrdering(Def);
return Def;
}
MFI.stackifyVReg(NewReg);
ImposeStackOrdering(Clone);
+ // Shrink the interval.
+ bool IsDead = MRI.use_empty(Reg);
+ if (!IsDead) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LIS.shrinkToUses(&LI);
+ IsDead = !LI.liveAt(LIS.getInstructionIndex(*Def).getDeadSlot());
+ }
+
// If that was the last use of the original, delete the original.
- // Otherwise shrink the LiveInterval.
- if (MRI.use_empty(Reg)) {
+ if (IsDead) {
SlotIndex Idx = LIS.getInstructionIndex(*Def).getRegSlot();
LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
- LIS.removeVRegDefAt(LIS.getInterval(Reg), Idx);
LIS.removeInterval(Reg);
LIS.RemoveMachineInstrFromMaps(*Def);
Def->eraseFromParent();
- } else {
- LIS.shrinkToUses(&LIS.getInterval(Reg));
}
+
return Clone;
}
bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
!TreeWalker.IsOnStack(Reg);
if (CanMove && MRI.hasOneUse(Reg)) {
- Insert = MoveForSingleUse(Reg, Def, MBB, Insert, LIS, MFI);
+ Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
} else if (Def->isAsCheapAsAMove() &&
TII->isTriviallyReMaterializable(Def, &AA)) {
Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI,
MRI, TII, TRI);
} else if (CanMove &&
- OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT)) {
+ OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS)) {
Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
MRI, TII);
} else {
SmallVector<unsigned, 0> Stack;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue())
+ continue;
for (MachineOperand &MO : reverse(MI.explicit_operands())) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- // Don't stackify physregs like SP or FP.
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
if (MFI.isVRegStackified(Reg)) {
if (MO.isDef())
Stack.push_back(Reg);
return Reserved;
}
-static bool isStackifiedVReg(const WebAssemblyFunctionInfo *WFI,
- const MachineOperand& Op) {
- if (Op.isReg()) {
- unsigned Reg = Op.getReg();
- return TargetRegisterInfo::isVirtualRegister(Reg) &&
- WFI->isVRegStackified(Reg);
- }
- return false;
-}
-
-static bool canStackifyOperand(const MachineInstr& Inst) {
- unsigned Op = Inst.getOpcode();
- return Op != TargetOpcode::PHI &&
- Op != TargetOpcode::INLINEASM &&
- Op != TargetOpcode::DBG_VALUE;
-}
-
-// Determine if the FI sequence can be stackified, and if so, where the code can
-// be inserted. If stackification is possible, returns true and ajusts II to
-// point to the insertion point.
-bool findInsertPt(const WebAssemblyFunctionInfo *WFI, MachineBasicBlock &MBB,
- unsigned OperandNum, MachineBasicBlock::iterator &II) {
- if (!canStackifyOperand(*II)) return false;
-
- MachineBasicBlock::iterator InsertPt(II);
- int StackCount = 0;
- // Operands are popped in reverse order, so any operands after FIOperand
- // impose a constraint
- for (unsigned i = OperandNum; i < II->getNumOperands(); i++) {
- if (isStackifiedVReg(WFI, II->getOperand(i))) ++StackCount;
- }
- // Walk backwards, tracking stack depth. When it reaches 0 we have reached the
- // top of the subtree.
- while (StackCount) {
- if (InsertPt == MBB.begin()) return false;
- --InsertPt;
- for (const auto &def : InsertPt->defs())
- if (isStackifiedVReg(WFI, def)) --StackCount;
- for (const auto &use : InsertPt->explicit_uses())
- if (isStackifiedVReg(WFI, use)) ++StackCount;
- }
- II = InsertPt;
- return true;
-}
-
void WebAssemblyRegisterInfo::eliminateFrameIndex(
MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
RegScavenger * /*RS*/) const {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
const MachineFrameInfo &MFI = *MF.getFrameInfo();
int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
+ // If this is the address operand of a load or store, make it relative to SP
+ // and fold the frame offset directly in.
if (MI.mayLoadOrStore() && FIOperandNum == WebAssembly::MemOpAddressOperandNo) {
- // If this is the address operand of a load or store, make it relative to SP
- // and fold the frame offset directly in.
assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0);
int64_t Offset = MI.getOperand(1).getImm() + FrameOffset;
- if (static_cast<uint64_t>(Offset) > std::numeric_limits<uint32_t>::max()) {
- // If this happens the program is invalid, but better to error here than
- // generate broken code.
- report_fatal_error("Memory offset field overflow");
+ if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
+ MI.getOperand(FIOperandNum - 1).setImm(Offset);
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+ return;
}
- MI.getOperand(FIOperandNum - 1).setImm(Offset);
- MI.getOperand(FIOperandNum)
- .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
- } else {
- // Otherwise calculate the address
- auto &MRI = MF.getRegInfo();
- const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-
- unsigned FIRegOperand = WebAssembly::SP32;
- if (FrameOffset) {
- // Create i32.add SP, offset and make it the operand. We want to stackify
- // this sequence, but we need to preserve the LIFO expr stack ordering
- // (i.e. we can't insert our code in between MI and any operands it
- // pops before FIOperand).
- auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
- bool CanStackifyFI = findInsertPt(WFI, MBB, FIOperandNum, II);
+ }
- unsigned OffsetOp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
- OffsetOp)
- .addImm(FrameOffset);
- if (CanStackifyFI) {
- WFI->stackifyVReg(OffsetOp);
- FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- WFI->stackifyVReg(FIRegOperand);
- } else {
- FIRegOperand = OffsetOp;
+ // If this is an address being added to a constant, fold the frame offset
+ // into the constant.
+ if (MI.getOpcode() == WebAssembly::ADD_I32) {
+ MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum);
+ if (OtherMO.isReg()) {
+ unsigned OtherMOReg = OtherMO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(OtherMOReg)) {
+ MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg);
+ // TODO: For now we just opportunistically do this in the case where
+ // the CONST_I32 happens to have exactly one def and one use. We
+ // should generalize this to optimize in more cases.
+ if (Def && Def->getOpcode() == WebAssembly::CONST_I32 &&
+ MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) {
+ MachineOperand &ImmMO = Def->getOperand(1);
+ ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+ return;
+ }
}
- BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
- FIRegOperand)
- .addReg(WebAssembly::SP32)
- .addReg(OffsetOp);
}
- MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
}
+
+ // Otherwise create an i32.add SP, offset and make it the operand.
+ const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+ unsigned FIRegOperand = WebAssembly::SP32;
+ if (FrameOffset) {
+ // Create i32.add SP, offset and make it the operand.
+ const TargetRegisterClass *PtrRC =
+ MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+ unsigned OffsetOp = MRI.createVirtualRegister(PtrRC);
+ BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
+ OffsetOp)
+ .addImm(FrameOffset);
+ FIRegOperand = MRI.createVirtualRegister(PtrRC);
+ BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
+ FIRegOperand)
+ .addReg(WebAssembly::SP32)
+ .addReg(OffsetOp);
+ }
+ MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
}
unsigned
--- /dev/null
+//===-- WebAssemblyReplacePhysRegs.cpp - Replace phys regs with virt regs -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a pass that replaces physical registers with
+/// virtual registers.
+///
+/// LLVM expects certain physical registers, such as a stack pointer. However,
+/// WebAssembly doesn't actually have such physical registers. This pass is run
+/// once LLVM no longer needs these registers, and replaces them with virtual
+/// registers, so they can participate in register stackifying and coloring in
+/// the normal way.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-replace-phys-regs"
+
+namespace {
+class WebAssemblyReplacePhysRegs final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyReplacePhysRegs() : MachineFunctionPass(ID) {}
+
+private:
+ const char *getPassName() const override {
+ return "WebAssembly Replace Physical Registers";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyReplacePhysRegs::ID = 0;
+FunctionPass *llvm::createWebAssemblyReplacePhysRegs() {
+ return new WebAssemblyReplacePhysRegs();
+}
+
+bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Replace Physical Registers **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+ bool Changed = false;
+
+ assert(!mustPreserveAnalysisID(LiveIntervalsID) &&
+ "LiveIntervals shouldn't be active yet!");
+ // We don't preserve SSA or liveness.
+ MRI.leaveSSA();
+ MRI.invalidateLiveness();
+
+ for (unsigned PReg = WebAssembly::NoRegister + 1;
+ PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) {
+ // Skip fake registers that are never used explicitly.
+ if (PReg == WebAssembly::EXPR_STACK || PReg == WebAssembly::ARGUMENTS)
+ continue;
+
+ // Replace explicit uses of the physical register with a virtual register.
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg);
+ unsigned VReg = WebAssembly::NoRegister;
+ for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E; ) {
+ MachineOperand &MO = *I++;
+ if (!MO.isImplicit()) {
+ if (VReg == WebAssembly::NoRegister)
+ VReg = MRI.createVirtualRegister(RC);
+ MO.setReg(VReg);
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
AU.addPreserved<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
unsigned FromReg, unsigned ToReg,
const MachineRegisterInfo &MRI,
- MachineDominatorTree &MDT) {
+ MachineDominatorTree &MDT,
+ LiveIntervals &LIS) {
bool Changed = false;
+
+ LiveInterval *FromLI = &LIS.getInterval(FromReg);
+ LiveInterval *ToLI = &LIS.getInterval(ToReg);
+
+ SlotIndex FromIdx = LIS.getInstructionIndex(MI).getRegSlot();
+ VNInfo *FromVNI = FromLI->getVNInfoAt(FromIdx);
+
+ SmallVector<SlotIndex, 4> Indices;
+
for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
MachineOperand &O = *I++;
MachineInstr *Where = O.getParent();
- if (Where->getOpcode() == TargetOpcode::PHI) {
- // PHIs use their operands on their incoming CFG edges rather than
- // in their parent blocks. Get the basic block paired with this use
- // of FromReg and check that MI's block dominates it.
- MachineBasicBlock *Pred =
- Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
- if (!MDT.dominates(&MBB, Pred))
- continue;
- } else {
- // For a non-PHI, check that MI dominates the instruction in the
- // normal way.
- if (&MI == Where || !MDT.dominates(&MI, Where))
- continue;
- }
+
+ // Check that MI dominates the instruction in the normal way.
+ if (&MI == Where || !MDT.dominates(&MI, Where))
+ continue;
+
+ // If this use gets a different value, skip it.
+ SlotIndex WhereIdx = LIS.getInstructionIndex(*Where);
+ VNInfo *WhereVNI = FromLI->getVNInfoAt(WhereIdx);
+ if (WhereVNI && WhereVNI != FromVNI)
+ continue;
+
+ // Make sure ToReg isn't clobbered before it gets there.
+ VNInfo *ToVNI = ToLI->getVNInfoAt(WhereIdx);
+ if (ToVNI && ToVNI != FromVNI)
+ continue;
+
Changed = true;
DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from "
<< MI << "\n");
O.setReg(ToReg);
- // If the store's def was previously dead, it is no longer. But the
- // dead flag shouldn't be set yet.
- assert(!MI.getOperand(0).isDead() && "Unexpected dead flag");
+
+ // If the store's def was previously dead, it is no longer.
+ MI.getOperand(0).setIsDead(false);
+
+ Indices.push_back(WhereIdx.getRegSlot());
+ }
+
+ if (Changed) {
+ // Extend ToReg's liveness.
+ LIS.extendToIndices(*ToLI, Indices);
+
+ // Shrink FromReg's liveness.
+ LIS.shrinkToUses(FromLI);
+
+ // If we replaced all dominated uses, FromReg is now killed at MI.
+ if (!FromLI->liveAt(FromIdx.getDeadSlot()))
+ MI.addRegisterKilled(FromReg,
+ MBB.getParent()->getSubtarget<WebAssemblySubtarget>()
+ .getRegisterInfo());
}
+
return Changed;
}
static bool optimizeStore(MachineBasicBlock &MBB, MachineInstr &MI,
const MachineRegisterInfo &MRI,
- MachineDominatorTree &MDT) {
- const auto &Stored = MI.getOperand(WebAssembly::StoreValueOperandNo);
- switch (Stored.getType()) {
- case MachineOperand::MO_Register: {
- unsigned ToReg = MI.getOperand(0).getReg();
- unsigned FromReg = Stored.getReg();
- return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
- }
- case MachineOperand::MO_FrameIndex:
- // TODO: optimize.
- return false;
- default:
- report_fatal_error("Store results: store not consuming reg or frame index");
- }
+ MachineDominatorTree &MDT,
+ LiveIntervals &LIS) {
+ unsigned ToReg = MI.getOperand(0).getReg();
+ unsigned FromReg = MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
+ return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
}
static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
const MachineRegisterInfo &MRI,
MachineDominatorTree &MDT,
+ LiveIntervals &LIS,
const WebAssemblyTargetLowering &TLI,
const TargetLibraryInfo &LibInfo) {
MachineOperand &Op1 = MI.getOperand(1);
if (!LibInfo.getLibFunc(Name, Func))
return false;
- const auto &Op2 = MI.getOperand(2);
- switch (Op2.getType()) {
- case MachineOperand::MO_Register: {
- unsigned FromReg = Op2.getReg();
- unsigned ToReg = MI.getOperand(0).getReg();
- if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
- report_fatal_error("Store results: call to builtin function with wrong "
- "signature, from/to mismatch");
- return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
- }
- case MachineOperand::MO_FrameIndex:
- // TODO: optimize.
- return false;
- default:
+ unsigned FromReg = MI.getOperand(2).getReg();
+ unsigned ToReg = MI.getOperand(0).getReg();
+ if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
report_fatal_error("Store results: call to builtin function with wrong "
- "signature, not consuming reg or frame index");
- }
+ "signature, from/to mismatch");
+ return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
}
bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
<< "********** Function: " << MF.getName() << '\n';
});
- const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
const WebAssemblyTargetLowering &TLI =
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
const auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
bool Changed = false;
- assert(MRI.isSSA() && "StoreResults depends on SSA form");
+ // We don't preserve SSA form.
+ MRI.leaveSSA();
+
+ assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking");
for (auto &MBB : MF) {
DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n');
case WebAssembly::STORE_F64:
case WebAssembly::STORE_I32:
case WebAssembly::STORE_I64:
- Changed |= optimizeStore(MBB, MI, MRI, MDT);
+ Changed |= optimizeStore(MBB, MI, MRI, MDT, LIS);
break;
case WebAssembly::CALL_I32:
case WebAssembly::CALL_I64:
- Changed |= optimizeCall(MBB, MI, MRI, MDT, TLI, LibInfo);
+ Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo);
break;
}
}
void addIRPasses() override;
bool addInstSelector() override;
- bool addILPOpts() override;
- void addPreRegAlloc() override;
void addPostRegAlloc() override;
bool addGCPasses() override { return false; }
void addPreEmitPass() override;
return false;
}
-bool WebAssemblyPassConfig::addILPOpts() {
- (void)TargetPassConfig::addILPOpts();
- return true;
-}
-
-void WebAssemblyPassConfig::addPreRegAlloc() {
- TargetPassConfig::addPreRegAlloc();
-
- // Prepare store instructions for register stackifying.
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createWebAssemblyStoreResults());
-}
-
void WebAssemblyPassConfig::addPostRegAlloc() {
// TODO: The following CodeGen passes don't currently support code containing
// virtual registers. Consider removing their restrictions and re-enabling
disablePass(&LiveDebugValuesID);
disablePass(&PatchableFunctionID);
- if (getOptLevel() != CodeGenOpt::None) {
- // Mark registers as representing wasm's expression stack.
- addPass(createWebAssemblyRegStackify());
-
- // Run the register coloring pass to reduce the total number of registers.
- addPass(createWebAssemblyRegColoring());
- }
-
TargetPassConfig::addPostRegAlloc();
// Run WebAssembly's version of the PrologEpilogInserter. Target-independent
void WebAssemblyPassConfig::addPreEmitPass() {
TargetPassConfig::addPreEmitPass();
+ // Now that we have a prologue and epilogue and all frame indices are
+ // rewritten, eliminate SP and FP. This allows them to be stackified,
+ // colored, and numbered with the rest of the registers.
+ addPass(createWebAssemblyReplacePhysRegs());
+
+ if (getOptLevel() != CodeGenOpt::None) {
+ // LiveIntervals isn't commonly run this late. Re-establish preconditions.
+ addPass(createWebAssemblyPrepareForLiveIntervals());
+
+ // Depend on LiveIntervals and perform some optimizations on it.
+ addPass(createWebAssemblyOptimizeLiveIntervals());
+
+ // Prepare store instructions for register stackifying.
+ addPass(createWebAssemblyStoreResults());
+
+ // Mark registers as representing wasm's expression stack. This is a key
+ // code-compression technique in WebAssembly. We run this pass (and
+ // StoreResults above) very late, so that it sees as much code as possible,
+ // including code emitted by PEI and expanded by late tail duplication.
+ addPass(createWebAssemblyRegStackify());
+
+ // Run the register coloring pass to reduce the total number of registers.
+ // This runs after stackification so that it doesn't consider registers
+ // that become stackified.
+ addPass(createWebAssemblyRegColoring());
+ }
+
// Eliminate multiple-entry loops.
addPass(createWebAssemblyFixIrreducibleControlFlow());
; CHECK-LABEL: byval_arg
define void @byval_arg(%SmallStruct* %ptr) {
; CHECK: .param i32
+ ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer
; Subtract 16 from SP (SP is 16-byte aligned)
; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L2]], $pop[[L3]]
; Ensure SP is stored back before the call
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]]
+ ; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop[[L4]]), $pop[[L10]]{{$}}
+ ; CHECK-NEXT: tee_local $push[[L11:.+]]=, $[[SP:.+]]=, $pop[[L12]]{{$}}
; Copy the SmallStruct argument to the stack (SP+12, original SP-4)
- ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0)
- ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]]
+ ; CHECK-NEXT: i32.load $push[[L0:.+]]=, 0($0)
+ ; CHECK-NEXT: i32.store $discard=, 12($pop[[L11]]), $pop[[L0]]
; Pass a pointer to the stack slot to the function
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]
- ; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]]
+ ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12{{$}}
+ ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, $[[SP]], $pop[[L5]]{{$}}
+ ; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]]{{$}}
call void @ext_byval_func(%SmallStruct* byval %ptr)
; Restore the stack
; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L8:.+]]=, [[SP]], $pop[[L6]]
+ ; CHECK-NEXT: i32.add $push[[L8:.+]]=, $[[SP]], $pop[[L6]]
; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), $pop[[L8]]
; CHECK-NEXT: return
ret void
; CHECK: .param i32
; Don't check the entire SP sequence, just enough to get the alignment.
; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop{{.+}}), $pop[[L10]]{{$}}
+ ; CHECK-NEXT: tee_local $push[[L11:.+]]=, $[[SP:.+]]=, $pop[[L12]]{{$}}
; Copy the SmallStruct argument to the stack (SP+8, original SP-8)
- ; CHECK: i32.load $push[[L4:.+]]=, 0($0){{$}}
- ; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]), $pop[[L4]]{{$}}
+ ; CHECK-NEXT: i32.load $push[[L0:.+]]=, 0($0){{$}}
+ ; CHECK-NEXT: i32.store $discard=, 8($pop[[L11]]), $pop[[L0]]{{$}}
; Pass a pointer to the stack slot to the function
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8
- ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]
- ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]]
+ ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8{{$}}
+ ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, $[[SP]], $pop[[L5]]{{$}}
+ ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]]{{$}}
call void @ext_byval_func_align8(%SmallStruct* byval align 8 %ptr)
ret void
}
; CHECK: .param i32
; Subtract 16 from SP (SP is 16-byte aligned)
; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.store $push[[L15:.+]]=, {{.+}}, $pop[[L12]]
+ ; CHECK-NEXT: tee_local $push[[L14:.+]]=, $[[SP:.+]]=, $pop[[L15]]
; Copy the AlignedStruct argument to the stack (SP+0, original SP-16)
; Just check the last load/store pair of the memcpy
; CHECK: i64.load $push[[L4:.+]]=, 0($0)
- ; CHECK-NEXT: i64.store {{.*}}=, 0([[SP]]), $pop[[L4]]
+ ; CHECK-NEXT: i64.store $discard=, 0($[[SP]]), $pop[[L4]]
; Pass a pointer to the stack slot to the function
- ; CHECK-NEXT: call ext_byval_func_alignedstruct@FUNCTION, [[SP]]
+ ; CHECK-NEXT: call ext_byval_func_alignedstruct@FUNCTION, $[[SP]]
tail call void @ext_byval_func_alignedstruct(%AlignedStruct* byval %ptr)
ret void
}
; Call memcpy for "big" byvals.
; CHECK-LABEL: big_byval:
+; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer
; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
; CHECK-NEXT: i32.const $push[[L3:.+]]=, 131072
-; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
-; CHECK: i32.call ${{[^,]+}}=, memcpy@FUNCTION,
+; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L2]], $pop[[L3]]
+; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop[[L4]]), $pop[[L8]]{{$}}
+; CHECK-NEXT: i32.const $push[[L0:.+]]=, 131072
+; CHECK-NEXT: i32.call $push[[L11:.+]]=, memcpy@FUNCTION, $pop{{.+}}, ${{.+}}, $pop{{.+}}
+; CHECK-NEXT: tee_local $push[[L9:.+]]=, $[[SP:.+]]=, $pop[[L11]]{{$}}
; CHECK-NEXT: call big_byval_callee@FUNCTION,
%big = type [131072 x i8]
declare void @big_byval_callee(%big* byval align 1)
; CHECK-NOT: local
; CHECK: block{{$}}
; CHECK: br_if 0, {{[^,]+}}{{$}}
-; CHECK: .LBB2_1:
+; CHECK: .LBB2_{{[0-9]+}}:
; CHECK: br_if 0, ${{[0-9]+}}{{$}}
-; CHECK: .LBB2_2:
+; CHECK: .LBB2_{{[0-9]+}}:
; CHECK: return{{$}}
; OPT-LABEL: test2:
; OPT-NOT: local
; OPT: block{{$}}
; OPT: br_if 0, {{[^,]+}}{{$}}
-; OPT: .LBB2_1:
+; OPT: .LBB2_{{[0-9]+}}:
; OPT: br_if 0, ${{[0-9]+}}{{$}}
-; OPT: .LBB2_2:
+; OPT: .LBB2_{{[0-9]+}}:
; OPT: return{{$}}
define void @test2(double* nocapture %p, i32 %n) {
entry:
; CHECK: .LBB11_1:
; CHECK: loop{{$}}
; CHECK: block{{$}}
-; CHECK: block{{$}}
; CHECK: br_if 0, $0{{$}}
; CHECK: br 1{{$}}
; CHECK: .LBB11_3:
+; CHECK: end_block{{$}}
; CHECK: block{{$}}
; CHECK: br_if 0, $1{{$}}
; CHECK: br 1{{$}}
; CHECK: .LBB11_5:
-; CHECK: .LBB11_6:
; CHECK: br 0{{$}}
-; CHECK: .LBB11_7:
+; CHECK: .LBB11_6:
; CHECK-NEXT: end_loop{{$}}
; OPT-LABEL: doublediamond_in_a_loop:
; OPT: .LBB11_1:
; OPT: loop{{$}}
; OPT: block{{$}}
-; OPT-NEXT: block{{$}}
-; OPT-NEXT: block{{$}}
; OPT: br_if 0, {{[^,]+}}{{$}}
-; OPT: br_if 1, {{[^,]+}}{{$}}
+; OPT: block{{$}}
+; OPT: br_if 0, {{[^,]+}}{{$}}
; OPT: br 2{{$}}
; OPT-NEXT: .LBB11_4:
; OPT-NEXT: end_block{{$}}
; OPT: br 1{{$}}
; OPT: .LBB11_5:
; OPT-NEXT: end_block{{$}}
-; OPT: .LBB11_6:
-; OPT-NEXT: end_block{{$}}
; OPT: br 0{{$}}
-; OPT: .LBB11_7:
+; OPT: .LBB11_6:
; OPT-NEXT: end_loop{{$}}
define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) {
entry:
; CHECK-LABEL: test8:
; CHECK: .LBB17_1:
; CHECK-NEXT: loop{{$}}
-; CHECK-NEXT: block{{$}}
-; CHECK-NOT: block
-; CHECK: br_if 0, {{[^,]+}}{{$}}
-; CHECK-NOT: block
-; CHECK: br_if 1, {{[^,]+}}{{$}}
-; CHECK-NEXT: .LBB17_3:
-; CHECK-NEXT: end_block{{$}}
-; CHECK-NEXT: loop{{$}}
; CHECK-NEXT: i32.const $push{{[^,]+}}, 0{{$}}
; CHECK-NEXT: br_if 0, {{[^,]+}}{{$}}
-; CHECK-NEXT: br 2{{$}}
-; CHECK-NEXT: .LBB17_4:
+; CHECK-NEXT: br 0{{$}}
+; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: end_loop{{$}}
; OPT-LABEL: test8:
; OPT: .LBB17_1:
; OPT-NEXT: loop{{$}}
-; OPT-NEXT: block{{$}}
-; OPT-NOT: block
-; OPT: br_if 0, {{[^,]+}}{{$}}
-; OPT-NOT: block
-; OPT: br_if 1, {{[^,]+}}{{$}}
-; OPT-NEXT: .LBB17_3:
-; OPT-NEXT: end_block{{$}}
-; OPT-NEXT: loop{{$}}
; OPT-NEXT: i32.const $push{{[^,]+}}, 0{{$}}
; OPT-NEXT: br_if 0, {{[^,]+}}{{$}}
-; OPT-NEXT: br 2{{$}}
-; OPT-NEXT: .LBB17_4:
+; OPT-NEXT: br 0{{$}}
+; OPT-NEXT: .LBB17_2:
+; OPT-NEXT: end_loop{{$}}
define i32 @test8() {
bb:
br label %bb1
; CHECK-NEXT: loop{{$}}
; CHECK-NEXT: i32.const $push0=, 0{{$}}
; CHECK-NEXT: br_if 0, $pop0{{$}}
-; CHECK-NEXT: .LBB23_2:{{$}}
; CHECK-NEXT: end_loop{{$}}
+; CHECK-NEXT: .LBB23_3:{{$}}
; CHECK-NEXT: loop{{$}}
-; CHECK-NEXT: i32.const $discard=, 0{{$}}
; CHECK-NEXT: i32.const $push1=, 0{{$}}
; CHECK-NEXT: br_if 0, $pop1{{$}}
; CHECK-NEXT: end_loop{{$}}
; CHECK-LABEL: frame_index:
-; CHECK: i32.call $discard=, memset@FUNCTION, $pop12, $pop1, $pop0{{$}}
-; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop3, $pop2{{$}}
+; CHECK: i32.call $discard=, memset@FUNCTION, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
+; CHECK: i32.call $push{{[0-9]+}}=, memset@FUNCTION, ${{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
; CHECK: return{{$}}
define void @frame_index() {
entry:
}
; If the result value of memset doesn't get stackified, it should be marked
-; $discard.
+; $discard. Note that we use a call to prevent tail dup so that we can test
+; this specific functionality.
; CHECK-LABEL: discard_result:
; CHECK: i32.call $discard=, memset@FUNCTION, $0, $1, $2
declare i8* @def()
+declare void @block_tail_dup()
define i8* @discard_result(i8* %arg, i8 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) {
bb:
%tmp = icmp eq i32 %arg3, 0
%tmp10 = call i8* @def()
br label %bb11
+bb11:
+ %tmp12 = phi i8* [ %arg, %bb7 ], [ %arg, %bb8 ], [ %tmp10, %bb9 ]
+ call void @block_tail_dup()
+ ret i8* %tmp12
+}
+
+; This is the same as discard_result, except we let tail dup happen, so the
+; result of the memset *is* stackified.
+
+; CHECK-LABEL: tail_dup_to_reuse_result:
+; CHECK: i32.call $push{{[0-9]+}}=, memset@FUNCTION, $0, $1, $2
+define i8* @tail_dup_to_reuse_result(i8* %arg, i8 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) {
+bb:
+ %tmp = icmp eq i32 %arg3, 0
+ br i1 %tmp, label %bb5, label %bb9
+
+bb5:
+ %tmp6 = icmp eq i32 %arg4, 0
+ br i1 %tmp6, label %bb7, label %bb8
+
+bb7:
+ call void @llvm.memset.p0i8.i32(i8* %arg, i8 %arg1, i32 %arg2, i32 1, i1 false)
+ br label %bb11
+
+bb8:
+ br label %bb11
+
+bb9:
+ %tmp10 = call i8* @def()
+ br label %bb11
+
bb11:
%tmp12 = phi i8* [ %arg, %bb7 ], [ %arg, %bb8 ], [ %tmp10, %bb9 ]
ret i8* %tmp12
; merged into i64 stores.
; CHECK-LABEL: aggregate_return:
-; CHECK: i64.const $push0=, 0{{$}}
-; CHECK: i64.store $push1=, 8($0):p2align=2, $pop0{{$}}
-; CHECK: i64.store $discard=, 0($0):p2align=2, $pop1{{$}}
+; CHECK: i64.const $push[[L0:[0-9]+]]=, 0{{$}}
+; CHECK: i64.store $push[[L1:[0-9]+]]=, 8($0):p2align=2, $pop[[L0]]{{$}}
+; CHECK: i64.store $discard=, 0($0):p2align=2, $pop[[L1]]{{$}}
define {i32,i32,i32,i32} @aggregate_return() {
ret {i32,i32,i32,i32} zeroinitializer
}
; merged.
; CHECK-LABEL: aggregate_return_without_merge:
-; CHECK: i32.const $push0=, 0{{$}}
-; CHECK: i32.store8 $push1=, 14($0), $pop0{{$}}
-; CHECK: i32.store16 $push2=, 12($0), $pop1{{$}}
-; CHECK: i32.store $discard=, 8($0), $pop2{{$}}
-; CHECK: i64.const $push3=, 0{{$}}
-; CHECK: i64.store $discard=, 0($0), $pop3{{$}}
+; CHECK: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
+; CHECK: i32.store8 $push[[L1:[0-9]+]]=, 14($0), $pop[[L0]]{{$}}
+; CHECK: i32.store16 $push[[L2:[0-9]+]]=, 12($0), $pop[[L1]]{{$}}
+; CHECK: i32.store $discard=, 8($0), $pop[[L2]]{{$}}
+; CHECK: i64.const $push[[L3:[0-9]+]]=, 0{{$}}
+; CHECK: i64.store $discard=, 0($0), $pop[[L3]]{{$}}
define {i64,i32,i16,i8} @aggregate_return_without_merge() {
ret {i64,i32,i16,i8} zeroinitializer
}
; CHECK-LABEL: simple_multiple_use:
; CHECK: .param i32, i32{{$}}
; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
-; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $0=, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
; CHECK-NEXT: call use_a@FUNCTION, $pop[[NUM1]]{{$}}
-; CHECK-NEXT: call use_b@FUNCTION, $0{{$}}
+; CHECK-NEXT: call use_b@FUNCTION, $[[NUM2]]{{$}}
; CHECK-NEXT: return{{$}}
declare void @use_a(i32)
declare void @use_b(i32)
; CHECK-LABEL: multiple_uses_in_same_insn:
; CHECK: .param i32, i32{{$}}
; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
-; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $0=, $pop[[NUM0]]{{$}}
-; CHECK-NEXT: call use_2@FUNCTION, $pop[[NUM1]], $0{{$}}
+; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: call use_2@FUNCTION, $pop[[NUM1]], $[[NUM2]]{{$}}
; CHECK-NEXT: return{{$}}
declare void @use_2(i32, i32)
define void @multiple_uses_in_same_insn(i32 %x, i32 %y) {
; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
; CHECK-NEXT: f64.select $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}}
; CHECK: $[[NUM2]]=,
-; CHECK: $[[NUM2]]=,
define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) {
bb:
br label %bb5
; Don't move stores past loads if there may be aliasing
; CHECK-LABEL: no_stackify_store_past_load
-; CHECK: i32.store {{.*}}, 0($1), $0
+; CHECK: i32.store $[[L0:[0-9]+]]=, 0($1), $0
; CHECK: i32.load {{.*}}, 0($2)
-; CHECK: i32.call {{.*}}, callee@FUNCTION, $0
+; CHECK: i32.call {{.*}}, callee@FUNCTION, $[[L0]]{{$}}
define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) {
store i32 %a, i32* %p1
%b = load i32, i32* %p2, align 4
}
; CHECK-LABEL: fi_ret:
-; CHECK: i32.store $discard=,
+; CHECK: i32.store $push0=,
+; CHECK: return $pop0{{$}}
define hidden i8* @fi_ret(i8** %addr) {
entry:
%buf = alloca [27 x i8], align 16
; Check that there is an extra local for the stack pointer.
; CHECK: .local i32{{$}}
define void @alloca32() noredzone {
+ ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer{{$}}
; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]]
+ ; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 0($pop[[L4]]), $pop[[L8]]{{$}}
+ ; CHECK-NEXT: tee_local $push[[L9:.+]]=, $[[SP:.+]]=, $pop[[L10]]{{$}}
%retval = alloca i32
; CHECK: i32.const $push[[L0:.+]]=, 0
- ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]]
+ ; CHECK: i32.store {{.*}}=, 12($pop[[L9]]), $pop[[L0]]
store i32 0, i32* %retval
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
+ ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]]
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
ret void
}
; CHECK-LABEL: alloca3264:
; CHECK: .local i32{{$}}
define void @alloca3264() {
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK: i32.const $push[[L2:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L3:.+]]=, 0($pop[[L2]])
+ ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 16
+ ; CHECK-NEXT: i32.sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]]
+ ; CHECK-NEXT: tee_local $push[[L5:.+]]=, $[[SP:.+]]=, $pop[[L6]]
%r1 = alloca i32
%r2 = alloca double
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 0
- ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ ; CHECK-NEXT: i32.const $push[[L0:.+]]=, 0
+ ; CHECK-NEXT: i32.store $discard=, 12($pop[[L5]]), $pop[[L0]]
store i32 0, i32* %r1
- ; CHECK-NEXT: i64.const $push[[L0:.+]]=, 0
- ; CHECK-NEXT: i64.store {{.*}}=, 0([[SP]]), $pop[[L0]]
+ ; CHECK-NEXT: i64.const $push[[L1:.+]]=, 0
+ ; CHECK-NEXT: i64.store $discard=, 0($[[SP]]), $pop[[L1]]
store double 0.0, double* %r2
; CHECK-NEXT: return
ret void
; CHECK-LABEL: allocarray:
; CHECK: .local i32{{$}}
define void @allocarray() {
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 144{{$}}
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]]
+ ; CHECK: i32.const $push[[L7:.+]]=, __stack_pointer
+ ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L5:.+]]=, 0($pop[[L4]])
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 144{{$}}
+ ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L5]], $pop[[L6]]
+ ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L7]]), $pop[[L11]]
%r = alloca [33 x i32]
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[L6:.+]]=, $pop[[L7]], $pop[[L4]]
- ; CHECK-NEXT: i32.const $push[[L9:.+]]=, 1{{$}}
- ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 12([[SP]]), $pop[[L9]]{{$}}
- ; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}}
+ ; CHECK-NEXT: i32.const $push[[L2:.+]]=, 24
+ ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $[[SP]], $pop[[L2]]
+ ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}}
+ ; CHECK-NEXT: i32.store $push[[L0:.+]]=, 0($pop[[L3]]), $pop[[L1]]{{$}}
+ ; CHECK-NEXT: i32.store $discard=, 12($[[SP]]), $pop[[L0]]{{$}}
%p = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 0
store i32 1, i32* %p
%p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3
store i32 1, i32* %p2
- ; CHECK: i32.const $push[[L12:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 144
- ; CHECK-NEXT: i32.add $push[[L13:.+]]=, [[SP]], $pop[[L11]]
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), $pop[[L13]]
+ ; CHECK: i32.const $push[[L10:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.const $push[[L8:.+]]=, 144
+ ; CHECK-NEXT: i32.add $push[[L19:.+]]=, $[[SP]], $pop[[L8]]
+ ; CHECK-NEXT: i32.store $discard=, 0($pop[[L10]]), $pop[[L9]]
ret void
}
; CHECK-LABEL: non_mem_use
define void @non_mem_use(i8** %addr) {
; CHECK: i32.const $push[[L1:.+]]=, 48
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.store $[[SP:.+]]=, {{.+}}, $pop[[L11]]
%buf = alloca [27 x i8], align 16
%r = alloca i64
%r2 = alloca i64
; %r is at SP+8
+ ; CHECK: tee_local $push[[L12:.+]]=, $[[SP:.+]]=, $pop{{.+}}
; CHECK: i32.const $push[[OFF:.+]]=, 8
- ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, [[SP]], $pop[[OFF]]
+ ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, $pop[[L12]], $pop[[OFF]]
; CHECK-NEXT: call ext_func@FUNCTION, $pop[[ARG1]]
call void @ext_func(i64* %r)
; %r2 is at SP+0, no add needed
- ; CHECK-NEXT: call ext_func@FUNCTION, [[SP]]
+ ; CHECK-NEXT: call ext_func@FUNCTION, $[[SP]]
call void @ext_func(i64* %r2)
; Use as a value, but in a store
; %buf is at SP+16
; CHECK: i32.const $push[[OFF:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, [[SP]], $pop[[OFF]]
+ ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, $[[SP]], $pop[[OFF]]
; CHECK-NEXT: i32.store {{.*}}=, 0($0), $pop[[VAL]]
%gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0
store i8* %gep, i8** %addr
; CHECK-LABEL: allocarray_inbounds:
; CHECK: .local i32{{$}}
define void @allocarray_inbounds() {
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
+ ; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($pop[[L3]])
+ ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L4]], $pop[[L5]]
+ ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L6]]), $pop[[L10]]{{$}}
%r = alloca [5 x i32]
; CHECK: i32.const $push[[L3:.+]]=, 1
- ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ ; CHECK: i32.store {{.*}}=, 12($[[SP]]), $pop[[L3]]
%p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0
store i32 1, i32* %p
; This store should have both the GEP and the FI folded into it.
- ; CHECK-NEXT: i32.store {{.*}}=, 24([[SP]]), $pop
+ ; CHECK-NEXT: i32.store {{.*}}=, 24($[[SP]]), $pop
%p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3
store i32 1, i32* %p2
call void @ext_func(i64* null);
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
+ ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]]
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
ret void
}
; CHECK-LABEL: dynamic_alloca:
define void @dynamic_alloca(i32 %alloc) {
; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
+ ; CHECK-NEXT: i32.load $push[[L13:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: tee_local $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
+ ; CHECK-NEXT: copy_local [[FP:.+]]=, $pop[[L12]]{{$}}
; Target independent codegen bumps the stack pointer.
; CHECK: i32.sub
- ; CHECK-NEXT: copy_local [[SP]]=,
; Check that SP is written back to memory after decrement
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]]
+ ; CHECK: i32.store $discard=, 0($pop{{.+}}),
%r = alloca i32, i32 %alloc
; Target-independent codegen also calculates the store addr
; CHECK: call ext_func_i32@FUNCTION
; CHECK-LABEL: dynamic_alloca_redzone:
define void @dynamic_alloca_redzone(i32 %alloc) {
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
+ ; CHECK: i32.const $push[[L8:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L13:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: tee_local $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
+ ; CHECK-NEXT: copy_local [[FP:.+]]=, $pop[[L12]]{{$}}
; Target independent codegen bumps the stack pointer
- ; CHECK: i32.sub [[R:.+]]=,
- ; CHECK-NEXT: copy_local [[SP]]=, [[R]]
+ ; CHECK: i32.sub
%r = alloca i32, i32 %alloc
- ; check-next here asserts that SP is not written back.
- ; CHECK-NEXT: i32.const $push[[ZERO:.+]]=, 0
- ; CHECK-NEXT: i32.store $discard=, 0([[R]]), $pop[[ZERO]]
+ ; CHECK-NEXT: tee_local $push[[L8:.+]]=, $0=, $pop
+ ; CHECK-NEXT: copy_local $discard=, $pop[[L8]]{{$}}
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 0{{$}}
+ ; CHECK-NEXT: i32.store $discard=, 0($0), $pop[[L6]]{{$}}
store i32 0, i32* %r
; CHECK-NEXT: return
ret void
; CHECK-LABEL: dynamic_static_alloca:
define void @dynamic_static_alloca(i32 %alloc) noredzone {
; Decrement SP in the prolog by the static amount and writeback to memory.
- ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
- ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]]
+ ; CHECK: i32.const $push[[L9:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L10:.+]]=, 0($pop[[L9]])
+ ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 16
+ ; CHECK-NEXT: i32.sub $push[[L20:.+]]=, $pop[[L10]], $pop[[L11]]
+ ; CHECK-NEXT: tee_local $push[[L19:.+]]=, $[[FP:.+]]=, $pop[[L20]]
+ ; CHECK: i32.store $push[[L0:.+]]=, 0($pop{{.+}}), $[[FP]]
; Decrement SP in the body by the dynamic amount.
; CHECK: i32.sub
- ; CHECK: copy_local [[SP]]=,
; Writeback to memory.
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]]
+ ; CHECK: i32.store $discard=, 0($pop{{.+}}), $pop{{.+}}
%r1 = alloca i32
%r = alloca i32, i32 %alloc
store i32 0, i32* %r
- ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[FP]], $pop[[L5]]
- ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
+ ; CHEC: i32.store $discard=, 0($pop{{.+}}), $pop{{.+}}
ret void
}
define void @copytoreg_fi(i1 %cond, i32* %b) {
entry:
; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK-NEXT: i32.sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]]
%addr = alloca i32
; CHECK: i32.const $push[[OFF:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, [[SP]], $pop[[OFF]]
+ ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]]
; CHECK-NEXT: copy_local [[COPY:.+]]=, $pop[[ADDR]]
br label %body
body:
; Test __builtin_frame_address(0).
; CHECK-LABEL: frameaddress_0:
-; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
-; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]])
-; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
-; CHECK-NEXT: call use_i8_star@FUNCTION, [[FP]]
-; CHEC K-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
-; CHEC K-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[FP]]
+; CHECK: i32.const $push[[L0:.+]]=, __stack_pointer
+; CHECK-NEXT: i32.load $push[[L3:.+]]=, 0($pop[[L0]])
+; CHECK-NEXT: copy_local $push[[L4:.+]]=, $pop[[L3]]{{$}}
+; CHECK-NEXT: tee_local $push[[L2:.+]]=, $[[FP:.+]]=, $pop[[L4]]{{$}}
+; CHECK-NEXT: call use_i8_star@FUNCTION, $pop[[L2]]
+; CHECK-NEXT: i32.const $push[[L1:.+]]=, __stack_pointer
+; CHECK-NEXT: i32.store $discard=, 0($pop[[L1]]), $[[FP]]
define void @frameaddress_0() {
%t = call i8* @llvm.frameaddress(i32 0)
call void @use_i8_star(i8* %t)