unsigned MemReg, int64_t MemOffset) {
return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
}
+
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+
// for some reason, this default is not generated by tablegen
// so we explicitly generate it here.
//
}
}
+unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ // We treat the MUL instruction in a special way because it clobbers
+ // the HI0 & LO0 registers. The TableGen definition of this instruction can
+ // mark these registers only as implicitly defined. As a result, the
+ // register allocator runs out of registers when this instruction is
+ // followed by another instruction that defines the same registers too.
+ // We can fix this by explicitly marking those registers as dead.
+ if (MachineInstOpcode == Mips::MUL) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Mips::HI0, RegState::ImplicitDefine | RegState::Dead)
+ .addReg(Mips::LO0, RegState::ImplicitDefine | RegState::Dead);
+ return ResultReg;
+ }
+
+ return FastISel::fastEmitInst_rr(MachineInstOpcode, RC, Op0, Op0IsKill, Op1,
+ Op1IsKill);
+}
+
namespace llvm {
FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
--- /dev/null
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 \
+; RUN: -fast-isel -mips-fast-isel -relocation-model=pic
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 \
+; RUN: -fast-isel -mips-fast-isel -relocation-model=pic
+
+; The test is just to make sure it is able to allocate
+; registers for this example. There was an issue with allocating AC0
+; after a mul instruction.
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+ %0 = mul i32 %a, %b
+ %1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %0, i32 %b)
+ %2 = extractvalue { i32, i1 } %1, 0
+ ret i32 %2
+}