[ARM] Enable isel of UMAAL

author Sam Parker <sam.parker@arm.com>

Mon, 20 Jun 2016 16:47:09 +0000 (16:47 +0000)

committer Sam Parker <sam.parker@arm.com>

Mon, 20 Jun 2016 16:47:09 +0000 (16:47 +0000)
author Sam Parker <sam.parker@arm.com>
Mon, 20 Jun 2016 16:47:09 +0000 (16:47 +0000)
committer Sam Parker <sam.parker@arm.com>
Mon, 20 Jun 2016 16:47:09 +0000 (16:47 +0000)
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp

index a6087d663a3e183ba8c6e01c209ddc93c9eb9756..7360210ff99e3b66df2453de5e934f9ef62211ca 100644 (file)
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2939,7 +2939,47 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
        return;
      }
    }
+  case ARMISD::UMAAL: {
+    unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                      N->getOperand(2), N->getOperand(3),
+                      getAL(CurDAG, dl),
+                      CurDAG->getRegister(0, MVT::i32) };
+    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
+    return;
+  }
    case ARMISD::UMLAL:{
+    // UMAAL is similar to UMLAL but it adds two 32-bit values to the
+    // 64-bit multiplication result.
+    if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
+        N->getOperand(3).getOpcode() == ARMISD::ADDE) {
+
+      SDValue Addc = N->getOperand(2);
+      SDValue Adde = N->getOperand(3);
+
+      if (Adde.getOperand(2).getNode() == Addc.getNode()) {
+
+        ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
+        ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
+
+        if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
+        {
+          // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
+          // RdLo = one operand to be added, lower 32-bits of res
+          // RdHi = other operand to be added, upper 32-bits of res
+          // Rn = first multiply operand
+          // Rm = second multiply operand
+          SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                            Addc.getOperand(0), Addc.getOperand(1),
+                            getAL(CurDAG, dl),
+                            CurDAG->getRegister(0, MVT::i32) };
+          unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
+          CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
+          return;
+        }
+      }
+    }
+
      if (Subtarget->isThumb()) {
        SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                          N->getOperand(3), getAL(CurDAG, dl),
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 18a583ffc7b9baf187687c037f021c50aa57d51f..26b4a1c98dfa648c5967e397057568dd119303c8 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1212,6 +1212,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case ARMISD::VTBL2:         return "ARMISD::VTBL2";
    case ARMISD::VMULLs:        return "ARMISD::VMULLs";
    case ARMISD::VMULLu:        return "ARMISD::VMULLu";
+  case ARMISD::UMAAL:         return "ARMISD::UMAAL";
    case ARMISD::UMLAL:         return "ARMISD::UMLAL";
    case ARMISD::SMLAL:         return "ARMISD::SMLAL";
    case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
@@ -8686,11 +8687,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
                                       TargetLowering::DAGCombinerInfo &DCI,
                                       const ARMSubtarget *Subtarget) {
  
-  if (Subtarget->isThumb1Only()) return SDValue();
-
-  // Only perform the checks after legalize when the pattern is available.
-  if (DCI.isBeforeLegalize()) return SDValue();
-
    // Look for multiply add opportunities.
    // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
    // each add nodes consumes a value from ISD::UMUL_LOHI and there is
@@ -8818,14 +8814,97 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
    return resNode;
  }
  
+static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
+                                      TargetLowering::DAGCombinerInfo &DCI,
+                                      const ARMSubtarget *Subtarget) {
+  // UMAAL is similar to UMLAL except that it adds two unsigned values.
+  // While trying to combine for the other MLAL nodes, first search for the
+  // chance to use UMAAL. Check if Addc uses another addc node which can first
+  // be combined into a UMLAL. The other pattern is AddcNode being combined
+  // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
+
+  if (!Subtarget->hasV6Ops())
+    return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+  SDNode *PrevAddc = nullptr;
+  if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
+    PrevAddc = AddcNode->getOperand(0).getNode();
+  else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
+    PrevAddc = AddcNode->getOperand(1).getNode();
+
+  // If there's no addc chains, just return a search for any MLAL.
+  if (PrevAddc == nullptr)
+    return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+  // Try to convert the addc operand to an MLAL and if that fails try to
+  // combine AddcNode.
+  SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
+  if (MLAL != SDValue(PrevAddc, 0))
+    return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+  // Find the converted UMAAL or quit if it doesn't exist.
+  SDNode *UmlalNode = nullptr;
+  SDValue AddHi;
+  if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
+    UmlalNode = AddcNode->getOperand(0).getNode();
+    AddHi = AddcNode->getOperand(1);
+  } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
+    UmlalNode = AddcNode->getOperand(1).getNode();
+    AddHi = AddcNode->getOperand(0);
+  } else {
+    return SDValue();
+  }
+
+  // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
+  // the ADDC as well as Zero.
+  auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
+
+  if (!Zero || Zero->getZExtValue() != 0)
+    return SDValue();
+
+  // Check that we have a glued ADDC node.
+  if (AddcNode->getValueType(1) != MVT::Glue)
+    return SDValue();
+
+  // Look for the glued ADDE.
+  SDNode* AddeNode = AddcNode->getGluedUser();
+  if (!AddeNode)
+    return SDValue();
+
+  if ((AddeNode->getOperand(0).getNode() == Zero &&
+       AddeNode->getOperand(1).getNode() == UmlalNode) ||
+      (AddeNode->getOperand(0).getNode() == UmlalNode &&
+       AddeNode->getOperand(1).getNode() == Zero)) {
+
+    SelectionDAG &DAG = DCI.DAG;
+    SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
+                      UmlalNode->getOperand(2), AddHi };
+    SDValue UMAAL =  DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
+                                 DAG.getVTList(MVT::i32, MVT::i32), Ops);
+
+    // Replace the ADDs' nodes uses by the UMAAL node's values.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
+    DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
+
+    // Return original node to notify the driver to stop replacing.
+    return SDValue(AddcNode, 0);
+  }
+  return SDValue();
+}
+
  /// PerformADDCCombine - Target-specific dag combine transform from
-/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
+/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
+/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
  static SDValue PerformADDCCombine(SDNode *N,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   const ARMSubtarget *Subtarget) {
  
-  return AddCombineTo64bitMLAL(N, DCI, Subtarget);
+  if (Subtarget->isThumb1Only()) return SDValue();
+
+  // Only perform the checks after legalize when the pattern is available.
+  if (DCI.isBeforeLegalize()) return SDValue();
  
+  return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
  }
  
  /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h

index 85b820c9f6cd8d38808899e003fbc7d72449a711..4cdc6182040f08fbf3993109c2632bf11e08fb9b 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -163,6 +163,7 @@ namespace llvm {
  
        UMLAL,        // 64bit Unsigned Accumulate Multiply
        SMLAL,        // 64bit Signed Accumulate Multiply
+      UMAAL,        // 64-bit Unsigned Accumulate Accumulate Multiply
  
        // Operands of the standard BUILD_VECTOR node are not legalized, which
        // is fine if BUILD_VECTORs are always lowered to shuffles or other
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index fa18ecd276403cff02f847334eaf782fb792ce78..b57f3e84d3bee9feaf6cdc993f160645276fe1f6 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -95,6 +95,7 @@ def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>,
                                          SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
  def ARMUmlal         : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
  def ARMSmlal         : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
+def ARMUmaal         : SDNode<"ARMISD::UMAAL", SDT_ARM64bitmlal>;
  
  // Node definitions.
  def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
@@ -3950,9 +3951,10 @@ def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
           RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
  
  def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
-                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                               (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+                               IIC_iMAC64,
                      "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
-                    Requires<[IsARM, HasV6]> {
+         RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> {
    bits<4> RdLo;
    bits<4> RdHi;
    bits<4> Rm;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td

index c642ad90c2d536cda733f8bec9b8dcb5977ba44f..22aca239565c5a01fd0d24c01be915fac7cbd4f1 100644 (file)
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2593,8 +2593,9 @@ def t2UMLAL : T2MlaLong<0b110, 0b0000,
  
  def t2UMAAL : T2MulLong<0b110, 0b0110,
                    (outs rGPR:$RdLo, rGPR:$RdHi),
-                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
                    "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+          RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
            Requires<[IsThumb2, HasDSP]>;
  } // hasSideEffects
  
diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll

index 3f30fd40b7e786a0b1a667bf47ace4ef4c4b66b8..cc2d745aae8e0e6990a916682db33cf63867ad72 100644 (file)
--- a/test/CodeGen/ARM/longMAC.ll
+++ b/test/CodeGen/ARM/longMAC.ll
@@ -116,3 +116,32 @@ define i64 @MACLongTest8(i64 %acc, i32 %lhs, i32 %rhs) {
    ret i64 %add
  }
  
+define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
+;CHECK-LABEL: MACLongTest9:
+;CHECK-V7-LE:umaal
+;CHECK-V7-BE:umaal
+;CHECK-NOT:umaal
+  %conv = zext i32 %lhs to i64
+  %conv1 = zext i32 %rhs to i64
+  %mul = mul nuw i64 %conv1, %conv
+  %conv2 = zext i32 %lo to i64
+  %add = add i64 %mul, %conv2
+  %conv3 = zext i32 %hi to i64
+  %add2 = add i64 %add, %conv3
+  ret i64 %add2
+}
+
+define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
+;CHECK-LABEL: MACLongTest10:
+;CHECK-V7-LE:umaal
+;CHECK-V7-BE:umaal
+;CHECK-NOT:umaal
+  %conv = zext i32 %lhs to i64
+  %conv1 = zext i32 %rhs to i64
+  %mul = mul nuw i64 %conv1, %conv
+  %conv2 = zext i32 %lo to i64
+  %conv3 = zext i32 %hi to i64
+  %add = add i64 %conv2, %conv3
+  %add2 = add i64 %add, %mul
+  ret i64 %add2
+}
author	Sam Parker <sam.parker@arm.com>
	Mon, 20 Jun 2016 16:47:09 +0000 (16:47 +0000)
committer	Sam Parker <sam.parker@arm.com>
	Mon, 20 Jun 2016 16:47:09 +0000 (16:47 +0000)
lib/Target/ARM/ARMISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.h		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
lib/Target/ARM/ARMInstrThumb2.td		patch \| blob \| history
test/CodeGen/ARM/longMAC.ll		patch \| blob \| history