return;
}
}
+ case ARMISD::UMAAL: {
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3),
+ getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32) };
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
+ return;
+ }
case ARMISD::UMLAL:{
+ // UMAAL is similar to UMLAL but it adds two 32-bit values to the
+ // 64-bit multiplication result.
+ if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
+ N->getOperand(3).getOpcode() == ARMISD::ADDE) {
+
+ SDValue Addc = N->getOperand(2);
+ SDValue Adde = N->getOperand(3);
+
+ if (Adde.getOperand(2).getNode() == Addc.getNode()) {
+
+ ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
+ ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
+
+ if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
+ {
+ // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
+ // RdLo = one operand to be added, lower 32-bits of res
+ // RdHi = other operand to be added, upper 32-bits of res
+ // Rn = first multiply operand
+ // Rm = second multiply operand
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ Addc.getOperand(0), Addc.getOperand(1),
+ getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32) };
+ unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
+ CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
+ return;
+ }
+ }
+ }
+
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
- if (Subtarget->isThumb1Only()) return SDValue();
-
- // Only perform the checks after legalize when the pattern is available.
- if (DCI.isBeforeLegalize()) return SDValue();
-
// Look for multiply add opportunities.
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
return resNode;
}
+static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // UMAAL is similar to UMLAL except that it adds two unsigned values.
+ // While trying to combine for the other MLAL nodes, first search for the
+ // chance to use UMAAL. Check if Addc uses another addc node which can first
+ // be combined into a UMLAL. The other pattern is AddcNode being combined
+ // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
+
+ if (!Subtarget->hasV6Ops())
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ SDNode *PrevAddc = nullptr;
+ if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
+ PrevAddc = AddcNode->getOperand(0).getNode();
+ else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
+ PrevAddc = AddcNode->getOperand(1).getNode();
+
+ // If there's no addc chains, just return a search for any MLAL.
+ if (PrevAddc == nullptr)
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ // Try to convert the addc operand to an MLAL and if that fails try to
+ // combine AddcNode.
+ SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
+ if (MLAL != SDValue(PrevAddc, 0))
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ // Find the converted UMAAL or quit if it doesn't exist.
+ SDNode *UmlalNode = nullptr;
+ SDValue AddHi;
+ if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
+ UmlalNode = AddcNode->getOperand(0).getNode();
+ AddHi = AddcNode->getOperand(1);
+ } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
+ UmlalNode = AddcNode->getOperand(1).getNode();
+ AddHi = AddcNode->getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
+ // the ADDC as well as Zero.
+ auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
+
+ if (!Zero || Zero->getZExtValue() != 0)
+ return SDValue();
+
+ // Check that we have a glued ADDC node.
+ if (AddcNode->getValueType(1) != MVT::Glue)
+ return SDValue();
+
+ // Look for the glued ADDE.
+ SDNode* AddeNode = AddcNode->getGluedUser();
+ if (!AddeNode)
+ return SDValue();
+
+ if ((AddeNode->getOperand(0).getNode() == Zero &&
+ AddeNode->getOperand(1).getNode() == UmlalNode) ||
+ (AddeNode->getOperand(0).getNode() == UmlalNode &&
+ AddeNode->getOperand(1).getNode() == Zero)) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
+ UmlalNode->getOperand(2), AddHi };
+ SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
+ DAG.getVTList(MVT::i32, MVT::i32), Ops);
+
+ // Replace the ADDs' nodes uses by the UMAAL node's values.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
+
+ // Return original node to notify the driver to stop replacing.
+ return SDValue(AddcNode, 0);
+ }
+ return SDValue();
+}
+
/// PerformADDCCombine - Target-specific dag combine transform from
-/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
+/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
+/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
static SDValue PerformADDCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
- return AddCombineTo64bitMLAL(N, DCI, Subtarget);
+ if (Subtarget->isThumb1Only()) return SDValue();
+
+ // Only perform the checks after legalize when the pattern is available.
+ if (DCI.isBeforeLegalize()) return SDValue();
+ return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
}
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
UMLAL, // 64bit Unsigned Accumulate Multiply
SMLAL, // 64bit Signed Accumulate Multiply
+ UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
+def ARMUmaal : SDNode<"ARMISD::UMAAL", SDT_ARM64bitmlal>;
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]> {
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rm;
def t2UMAAL : T2MulLong<0b110, 0b0110,
(outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
Requires<[IsThumb2, HasDSP]>;
} // hasSideEffects
ret i64 %add
}
+define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
+;CHECK-LABEL: MACLongTest9:
+;CHECK-V7-LE:umaal
+;CHECK-V7-BE:umaal
+;CHECK-NOT:umaal
+ %conv = zext i32 %lhs to i64
+ %conv1 = zext i32 %rhs to i64
+ %mul = mul nuw i64 %conv1, %conv
+ %conv2 = zext i32 %lo to i64
+ %add = add i64 %mul, %conv2
+ %conv3 = zext i32 %hi to i64
+ %add2 = add i64 %add, %conv3
+ ret i64 %add2
+}
+
+define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
+;CHECK-LABEL: MACLongTest10:
+;CHECK-V7-LE:umaal
+;CHECK-V7-BE:umaal
+;CHECK-NOT:umaal
+ %conv = zext i32 %lhs to i64
+ %conv1 = zext i32 %rhs to i64
+ %mul = mul nuw i64 %conv1, %conv
+ %conv2 = zext i32 %lo to i64
+ %conv3 = zext i32 %hi to i64
+ %add = add i64 %conv2, %conv3
+ %add2 = add i64 %add, %mul
+ ret i64 %add2
+}