LLT PartTy, ArrayRef<unsigned> PartRegs,
LLT LeftoverTy = LLT(), ArrayRef<unsigned> LeftoverRegs = {});
+ /// Perform generic multiplication of values held in multiple registers.
+ /// Generated instructions use only types NarrowTy and i1.
+ /// Destination can be same or two times size of the source.
+ void multiplyRegisters(SmallVectorImpl<unsigned> &DstRegs,
+ ArrayRef<unsigned> Src1Regs,
+ ArrayRef<unsigned> Src2Regs, LLT NarrowTy);
+
LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI,
unsigned TypeIdx, LLT NarrowTy);
LLT HalfTy, LLT ShiftAmtTy);
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
- LegalizeResult narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty);
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0,
uint32_t NumBits);
+ /// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1
+ ///
+ /// G_UADDO sets \p Res to \p Op0 + \p Op1 (truncated to the bit width) and
+ /// sets \p CarryOut to 1 if the result overflowed in unsigned arithmetic.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers with the
+ /// same scalar type.
+ ////\pre \p CarryOut must be generic virtual register with scalar type
+ ///(typically s1)
+ ///
+ /// \return The newly created instruction.
+ MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1);
+
/// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0,
/// \p Op1, \p CarryIn
///
return Legalized;
}
case TargetOpcode::G_MUL:
- return narrowScalarMul(MI, TypeIdx, NarrowTy);
+ return narrowScalarMul(MI, NarrowTy);
case TargetOpcode::G_EXTRACT:
return narrowScalarExtract(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_INSERT:
}
}
+void LegalizerHelper::multiplyRegisters(SmallVectorImpl<unsigned> &DstRegs,
+ ArrayRef<unsigned> Src1Regs,
+ ArrayRef<unsigned> Src2Regs,
+ LLT NarrowTy) {
+ MachineIRBuilder &B = MIRBuilder;
+ unsigned SrcParts = Src1Regs.size();
+ unsigned DstParts = DstRegs.size();
+
+ unsigned DstIdx = 0; // Low bits of the result.
+ unsigned FactorSum =
+ B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
+ DstRegs[DstIdx] = FactorSum;
+
+ unsigned CarrySumPrevDstIdx;
+ SmallVector<unsigned, 4> Factors;
+
+ for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
+ // Collect low parts of muls for DstIdx.
+ for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
+ i <= std::min(DstIdx, SrcParts - 1); ++i) {
+ MachineInstrBuilder Mul =
+ B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
+ Factors.push_back(Mul.getReg(0));
+ }
+ // Collect high parts of muls from previous DstIdx.
+ for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
+ i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
+ MachineInstrBuilder Umulh =
+ B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
+ Factors.push_back(Umulh.getReg(0));
+ }
+ // Add CarrySum from additons calculated for previous DstIdx.
+ if (DstIdx != 1) {
+ Factors.push_back(CarrySumPrevDstIdx);
+ }
+
+ unsigned CarrySum;
+ // Add all factors and accumulate all carries into CarrySum.
+ if (DstIdx != DstParts - 1) {
+ MachineInstrBuilder Uaddo =
+ B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
+ FactorSum = Uaddo.getReg(0);
+ CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
+ for (unsigned i = 2; i < Factors.size(); ++i) {
+ MachineInstrBuilder Uaddo =
+ B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
+ FactorSum = Uaddo.getReg(0);
+ MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
+ CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
+ }
+ } else {
+ // Since value for the next index is not calculated, neither is CarrySum.
+ FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
+ for (unsigned i = 2; i < Factors.size(); ++i)
+ FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
+ }
+
+ CarrySumPrevDstIdx = CarrySum;
+ DstRegs[DstIdx] = FactorSum;
+ Factors.clear();
+ }
+}
+
LegalizerHelper::LegalizeResult
-LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) {
+LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Src0 = MI.getOperand(1).getReg();
- unsigned Src1 = MI.getOperand(2).getReg();
+ unsigned Src1 = MI.getOperand(1).getReg();
+ unsigned Src2 = MI.getOperand(2).getReg();
+
LLT Ty = MRI.getType(DstReg);
if (Ty.isVector())
return UnableToLegalize;
- unsigned Size = Ty.getSizeInBits();
- unsigned NewSize = Size / 2;
- if (Size != 2 * NewSize)
+ unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
+ unsigned DstSize = Ty.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
return UnableToLegalize;
- LLT HalfTy = LLT::scalar(NewSize);
- // TODO: if HalfTy != NewTy, handle the breakdown all at once?
-
- unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
- unsigned Lo = MRI.createGenericVirtualRegister(HalfTy);
- unsigned Hi = MRI.createGenericVirtualRegister(HalfTy);
- unsigned ExtLo = MRI.createGenericVirtualRegister(Ty);
- unsigned ExtHi = MRI.createGenericVirtualRegister(Ty);
- unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty);
+ unsigned NumDstParts = DstSize / NarrowSize;
+ unsigned NumSrcParts = SrcSize / NarrowSize;
- SmallVector<unsigned, 2> Src0Parts;
- SmallVector<unsigned, 2> Src1Parts;
+ SmallVector<unsigned, 2> Src1Parts, Src2Parts, DstRegs;
+ extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
+ extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
+ DstRegs.resize(NumDstParts);
+ multiplyRegisters(DstRegs, Src1Parts, Src2Parts, NarrowTy);
- extractParts(Src0, HalfTy, 2, Src0Parts);
- extractParts(Src1, HalfTy, 2, Src1Parts);
-
- MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]);
-
- // TODO: Use smulh or umulh depending on what the target has.
- MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]);
-
- MIRBuilder.buildConstant(ShiftAmt, NewSize);
- MIRBuilder.buildAnyExt(ExtHi, Hi);
- MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt);
-
- MIRBuilder.buildZExt(ExtLo, Lo);
- MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
-
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
.addMemOperand(&MMO);
}
+MachineInstrBuilder MachineIRBuilder::buildUAddo(const DstOp &Res,
+ const DstOp &CarryOut,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_UADDO, {Res, CarryOut}, {Op0, Op1});
+}
+
MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res,
const DstOp &CarryOut,
const SrcOp &Op0,
const LLT s64 = LLT::scalar(64);
const LLT p0 = LLT::pointer(0, 32);
- getActionDefinitionsBuilder({G_ADD, G_SUB})
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({s32})
.clampScalar(0, s32, s32);
- getActionDefinitionsBuilder(G_MUL)
- .legalFor({s32})
- .minScalar(0, s32);
-
getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO})
.lowerFor({{s32, s1}});
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
- ; CHECK: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UMULH]](s32)
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
- ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[TRUNC]](s32)
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[MUL]](s32)
- ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SHL]]
- ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](s64)
+ ; CHECK: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; CHECK: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; CHECK: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = G_MUL %0, %1
define void @mul_i16_sext() {entry: ret void}
define void @mul_i16_zext() {entry: ret void}
define void @mul_i16_aext() {entry: ret void}
+ define void @mul_i64() {entry: ret void}
+ define void @mul_i128() {entry: ret void}
define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) { ret void }
...
$v0 = COPY %5(s32)
RetRA implicit $v0
+...
+---
+name: mul_i64
+alignment: 2
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $a0, $a1, $a2, $a3
+
+ ; MIPS32-LABEL: name: mul_i64
+ ; MIPS32: liveins: $a0, $a1, $a2, $a3
+ ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+ ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+ ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+ ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY]]
+ ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]]
+ ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]]
+ ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]]
+ ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; MIPS32: $v0 = COPY [[MUL]](s32)
+ ; MIPS32: $v1 = COPY [[ADD1]](s32)
+ ; MIPS32: RetRA implicit $v0, implicit $v1
+ %2:_(s32) = COPY $a0
+ %3:_(s32) = COPY $a1
+ %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+ %4:_(s32) = COPY $a2
+ %5:_(s32) = COPY $a3
+ %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %6:_(s64) = G_MUL %1, %0
+ %7:_(s32), %8:_(s32) = G_UNMERGE_VALUES %6(s64)
+ $v0 = COPY %7(s32)
+ $v1 = COPY %8(s32)
+ RetRA implicit $v0, implicit $v1
+
+...
+---
+name: mul_i128
+alignment: 2
+tracksRegLiveness: true
+fixedStack:
+ - { id: 0, offset: 28, size: 4, alignment: 4, stack-id: 0, isImmutable: true }
+ - { id: 1, offset: 24, size: 4, alignment: 8, stack-id: 0, isImmutable: true }
+ - { id: 2, offset: 20, size: 4, alignment: 4, stack-id: 0, isImmutable: true }
+ - { id: 3, offset: 16, size: 4, alignment: 8, stack-id: 0, isImmutable: true }
+body: |
+ bb.1.entry:
+ liveins: $a0, $a1, $a2, $a3
+
+ ; MIPS32-LABEL: name: mul_i128
+ ; MIPS32: liveins: $a0, $a1, $a2, $a3
+ ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+ ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+ ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+ ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8)
+ ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1)
+ ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+ ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8)
+ ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
+ ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load 4 from %fixed-stack.3)
+ ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]]
+ ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]]
+ ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]]
+ ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]]
+ ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]]
+ ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
+ ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+ ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]]
+ ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+ ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+ ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]]
+ ; MIPS32: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]]
+ ; MIPS32: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]]
+ ; MIPS32: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]]
+ ; MIPS32: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]]
+ ; MIPS32: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]]
+ ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+ ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]]
+ ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+ ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+ ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL5]]
+ ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]]
+ ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ICMP3]](s32)
+ ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+ ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[AND2]], [[AND3]]
+ ; MIPS32: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH1]]
+ ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]]
+ ; MIPS32: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; MIPS32: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ICMP4]](s32)
+ ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+ ; MIPS32: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND4]]
+ ; MIPS32: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH2]]
+ ; MIPS32: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]]
+ ; MIPS32: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; MIPS32: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ICMP5]](s32)
+ ; MIPS32: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]]
+ ; MIPS32: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[AND5]]
+ ; MIPS32: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ADD2]]
+ ; MIPS32: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]]
+ ; MIPS32: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; MIPS32: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ICMP6]](s32)
+ ; MIPS32: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+ ; MIPS32: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[AND6]]
+ ; MIPS32: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]]
+ ; MIPS32: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]]
+ ; MIPS32: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]]
+ ; MIPS32: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY3]]
+ ; MIPS32: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]]
+ ; MIPS32: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]]
+ ; MIPS32: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]]
+ ; MIPS32: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]]
+ ; MIPS32: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]]
+ ; MIPS32: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]]
+ ; MIPS32: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]]
+ ; MIPS32: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]]
+ ; MIPS32: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]]
+ ; MIPS32: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]]
+ ; MIPS32: $v0 = COPY [[MUL]](s32)
+ ; MIPS32: $v1 = COPY [[ADD1]](s32)
+ ; MIPS32: $a0 = COPY [[ADD10]](s32)
+ ; MIPS32: $a1 = COPY [[ADD18]](s32)
+ ; MIPS32: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1
+ %2:_(s32) = COPY $a0
+ %3:_(s32) = COPY $a1
+ %4:_(s32) = COPY $a2
+ %5:_(s32) = COPY $a3
+ %0:_(s128) = G_MERGE_VALUES %2(s32), %3(s32), %4(s32), %5(s32)
+ %10:_(p0) = G_FRAME_INDEX %fixed-stack.3
+ %6:_(s32) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.3, align 8)
+ %11:_(p0) = G_FRAME_INDEX %fixed-stack.2
+ %7:_(s32) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.2)
+ %12:_(p0) = G_FRAME_INDEX %fixed-stack.1
+ %8:_(s32) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.1, align 8)
+ %13:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ %9:_(s32) = G_LOAD %13(p0) :: (load 4 from %fixed-stack.0)
+ %1:_(s128) = G_MERGE_VALUES %6(s32), %7(s32), %8(s32), %9(s32)
+ %14:_(s128) = G_MUL %1, %0
+ %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(s128)
+ $v0 = COPY %15(s32)
+ $v1 = COPY %16(s32)
+ $a0 = COPY %17(s32)
+ $a1 = COPY %18(s32)
+ RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1
+
...
---
name: umul_with_overflow
ret i16 %mul
}
+define i64 @mul_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: mul_i64:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: mul $2, $6, $4
+; MIPS32-NEXT: mul $7, $7, $4
+; MIPS32-NEXT: mul $5, $6, $5
+; MIPS32-NEXT: multu $6, $4
+; MIPS32-NEXT: mfhi $4
+; MIPS32-NEXT: addu $5, $7, $5
+; MIPS32-NEXT: addu $3, $5, $4
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: nop
+entry:
+ %mul = mul i64 %b, %a
+ ret i64 %mul
+}
+
+define i128 @mul_i128(i128 %a, i128 %b) {
+; MIPS32-LABEL: mul_i128:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: addiu $1, $sp, 16
+; MIPS32-NEXT: lw $1, 0($1)
+; MIPS32-NEXT: addiu $2, $sp, 20
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 24
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: addiu $8, $sp, 28
+; MIPS32-NEXT: lw $8, 0($8)
+; MIPS32-NEXT: mul $9, $1, $4
+; MIPS32-NEXT: mul $10, $2, $4
+; MIPS32-NEXT: mul $11, $1, $5
+; MIPS32-NEXT: multu $1, $4
+; MIPS32-NEXT: mfhi $12
+; MIPS32-NEXT: addu $10, $10, $11
+; MIPS32-NEXT: sltu $11, $10, $11
+; MIPS32-NEXT: lui $13, 0
+; MIPS32-NEXT: ori $13, $13, 1
+; MIPS32-NEXT: and $11, $11, $13
+; MIPS32-NEXT: addu $10, $10, $12
+; MIPS32-NEXT: sltu $12, $10, $12
+; MIPS32-NEXT: lui $13, 0
+; MIPS32-NEXT: ori $13, $13, 1
+; MIPS32-NEXT: and $12, $12, $13
+; MIPS32-NEXT: addu $11, $11, $12
+; MIPS32-NEXT: mul $12, $3, $4
+; MIPS32-NEXT: mul $13, $2, $5
+; MIPS32-NEXT: mul $14, $1, $6
+; MIPS32-NEXT: multu $2, $4
+; MIPS32-NEXT: mfhi $15
+; MIPS32-NEXT: multu $1, $5
+; MIPS32-NEXT: mfhi $24
+; MIPS32-NEXT: addu $12, $12, $13
+; MIPS32-NEXT: sltu $13, $12, $13
+; MIPS32-NEXT: lui $25, 0
+; MIPS32-NEXT: ori $25, $25, 1
+; MIPS32-NEXT: and $13, $13, $25
+; MIPS32-NEXT: addu $12, $12, $14
+; MIPS32-NEXT: sltu $14, $12, $14
+; MIPS32-NEXT: lui $25, 0
+; MIPS32-NEXT: ori $25, $25, 1
+; MIPS32-NEXT: and $14, $14, $25
+; MIPS32-NEXT: addu $13, $13, $14
+; MIPS32-NEXT: addu $12, $12, $15
+; MIPS32-NEXT: sltu $14, $12, $15
+; MIPS32-NEXT: lui $15, 0
+; MIPS32-NEXT: ori $15, $15, 1
+; MIPS32-NEXT: and $14, $14, $15
+; MIPS32-NEXT: addu $13, $13, $14
+; MIPS32-NEXT: addu $12, $12, $24
+; MIPS32-NEXT: sltu $14, $12, $24
+; MIPS32-NEXT: lui $15, 0
+; MIPS32-NEXT: ori $15, $15, 1
+; MIPS32-NEXT: and $14, $14, $15
+; MIPS32-NEXT: addu $13, $13, $14
+; MIPS32-NEXT: addu $12, $12, $11
+; MIPS32-NEXT: sltu $11, $12, $11
+; MIPS32-NEXT: lui $14, 0
+; MIPS32-NEXT: ori $14, $14, 1
+; MIPS32-NEXT: and $11, $11, $14
+; MIPS32-NEXT: addu $11, $13, $11
+; MIPS32-NEXT: mul $8, $8, $4
+; MIPS32-NEXT: mul $13, $3, $5
+; MIPS32-NEXT: mul $14, $2, $6
+; MIPS32-NEXT: mul $7, $1, $7
+; MIPS32-NEXT: multu $3, $4
+; MIPS32-NEXT: mfhi $3
+; MIPS32-NEXT: multu $2, $5
+; MIPS32-NEXT: mfhi $2
+; MIPS32-NEXT: multu $1, $6
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: addu $4, $8, $13
+; MIPS32-NEXT: addu $4, $4, $14
+; MIPS32-NEXT: addu $4, $4, $7
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: addu $2, $3, $2
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: addu $5, $1, $11
+; MIPS32-NEXT: move $2, $9
+; MIPS32-NEXT: move $3, $10
+; MIPS32-NEXT: move $4, $12
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: nop
+entry:
+ %mul = mul i128 %b, %a
+ ret i128 %mul
+}
+
declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) {
; MIPS32-LABEL: umul_with_overflow: