[ARM] Move SMULW[B|T] isel to DAG Combine

author Sam Parker <sam.parker@arm.com>

Tue, 14 Mar 2017 09:13:22 +0000 (09:13 +0000)

committer Sam Parker <sam.parker@arm.com>

Tue, 14 Mar 2017 09:13:22 +0000 (09:13 +0000)
author Sam Parker <sam.parker@arm.com>
Tue, 14 Mar 2017 09:13:22 +0000 (09:13 +0000)
committer Sam Parker <sam.parker@arm.com>
Tue, 14 Mar 2017 09:13:22 +0000 (09:13 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index 736bf1e1e179a262c736872eb54116d53c2b13c9..b786c88a286f0622471d7df5e924ed96f43187c8 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2379,6 +2379,10 @@ public:
                              unsigned Depth = 0,
                              bool AssumeSingleUse = false) const;
  
+  /// Helper wrapper around SimplifyDemandedBits
+  bool SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
+                            DAGCombinerInfo &DCI) const;
+
    /// Determine which of the bits specified in Mask are known to be either zero
    /// or one and return them in the KnownZero/KnownOne bitsets.
    virtual void computeKnownBitsForTargetNode(const SDValue Op,
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index b1bfd47eb02debb37209e8d119fe89c6421f4776..549e3def4ee27a39cb5104d072030b2223e6def6 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -471,6 +471,21 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
    return true;
  }
  
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
+                                          DAGCombinerInfo &DCI) const {
+
+  SelectionDAG &DAG = DCI.DAG;
+  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                        !DCI.isBeforeLegalizeOps());
+  APInt KnownZero, KnownOne;
+
+  bool Simplified = SimplifyDemandedBits(Op, DemandedMask, KnownZero, KnownOne,
+                                         TLO);
+  if (Simplified)
+    DCI.CommitTargetLoweringOpt(TLO);
+  return Simplified;
+}
+
  /// Look at Op. At this point, we know that only the DemandedMask bits of the
  /// result of Op are ever used downstream. If we can use this information to
  /// simplify Op, create a new simplified DAG node and return true, returning the
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp

index d4bc4767e6a771b792bd6128ca2ca3714b218f14..96240dd7e421dd6e8e99505b1b4989ec13334a6b 100644 (file)
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -247,8 +247,6 @@ private:
    void SelectConcatVector(SDNode *N);
    void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
  
-  bool trySMLAWSMULW(SDNode *N);
-
    void SelectCMP_SWAP(SDNode *N);
  
    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
@@ -2559,141 +2557,6 @@ bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
    return false;
  }
  
-static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
-                                 bool Accumulate) {
-  // For SM*WB, we need to some form of sext.
-  // For SM*WT, we need to search for (sra X, 16)
-  // Src1 then gets set to X.
-  if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
-       SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
-       SignExt.getOpcode() == ISD::AssertSext) &&
-       SignExt.getValueType() == MVT::i32) {
-
-    *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
-    Src1 = SignExt.getOperand(0);
-    return true;
-  }
-
-  if (SignExt.getOpcode() != ISD::SRA)
-    return false;
-
-  ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
-  if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
-    return false;
-
-  SDValue Op0 = SignExt.getOperand(0);
-
-  // The sign extend operand for SM*WB could be generated by a shl and ashr.
-  if (Op0.getOpcode() == ISD::SHL) {
-    SDValue SHL = Op0;
-    ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
-    if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
-      return false;
-
-    *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
-    Src1 = Op0.getOperand(0);
-    return true;
-  }
-  *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
-  Src1 = SignExt.getOperand(0);
-  return true;
-}
-
-static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
-                                SDValue &Src1, bool Accumulate) {
-  // First we look for:
-  // (add (or (srl ?, 16), (shl ?, 16)))
-  if (OR.getOpcode() != ISD::OR)
-    return false;
-
-  SDValue SRL = OR.getOperand(0);
-  SDValue SHL = OR.getOperand(1);
-
-  if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
-    SRL = OR.getOperand(1);
-    SHL = OR.getOperand(0);
-    if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
-      return false;
-  }
-
-  ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
-  ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
-  if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
-      SHLSrc1->getZExtValue() != 16)
-    return false;
-
-  // The first operands to the shifts need to be the two results from the
-  // same smul_lohi node.
-  if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
-       SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
-    return false;
-
-  SDNode *SMULLOHI = SRL.getOperand(0).getNode();
-  if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
-      SHL.getOperand(0) != SDValue(SMULLOHI, 1))
-    return false;
-
-  // Now we have:
-  // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
-  // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
-  // For SMLAWB the 16-bit value will signed extended somehow.
-  // For SMLAWT only the SRA is required.
-
-  // Check both sides of SMUL_LOHI
-  if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
-    Src0 = SMULLOHI->getOperand(1);
-  } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
-                                  Accumulate)) {
-    Src0 = SMULLOHI->getOperand(0);
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
-  if (!Subtarget->hasV6Ops() ||
-      (Subtarget->isThumb() && !Subtarget->hasThumb2()))
-    return false;
-
-  SDLoc dl(N);
-  SDValue Src0 = N->getOperand(0);
-  SDValue Src1 = N->getOperand(1);
-  SDValue A, B;
-  unsigned Opc = 0;
-
-  if (N->getOpcode() == ISD::ADD) {
-    if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
-      return false;
-
-    SDValue Acc;
-    if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
-      Acc = Src1;
-    } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
-      Acc = Src0;
-    } else {
-      return false;
-    }
-    if (Opc == 0)
-      return false;
-
-    SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
-                      CurDAG->getRegister(0, MVT::i32) };
-    CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
-    return true;
-  } else if (N->getOpcode() == ISD::OR &&
-             SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
-    if (Opc == 0)
-      return false;
-
-    SDValue Ops[] = { A, B, getAL(CurDAG, dl),
-                      CurDAG->getRegister(0, MVT::i32)};
-    CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
-    return true;
-  }
-  return false;
-}
-
  /// We've got special pseudo-instructions for these
  void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
    unsigned Opcode;
@@ -2822,11 +2685,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
  
    switch (N->getOpcode()) {
    default: break;
-  case ISD::ADD:
-  case ISD::OR:
-    if (trySMLAWSMULW(N))
-      return;
-    break;
    case ISD::WRITE_REGISTER:
      if (tryWriteRegister(N))
        return;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index ad95d988e9a654926de6661059bede0aa3b9d3bc..62953f4be18bdeac937b51b922066e9a00f4eeea 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1337,6 +1337,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case ARMISD::UMAAL:         return "ARMISD::UMAAL";
    case ARMISD::UMLAL:         return "ARMISD::UMLAL";
    case ARMISD::SMLAL:         return "ARMISD::SMLAL";
+  case ARMISD::SMULWB:        return "ARMISD::SMULWB";
+  case ARMISD::SMULWT:        return "ARMISD::SMULWT";
    case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
    case ARMISD::BFI:           return "ARMISD::BFI";
    case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
@@ -1446,6 +1448,40 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
  // Lowering Code
  //===----------------------------------------------------------------------===//
  
+static bool isSRL16(const SDValue &Op) {
+  if (Op.getOpcode() != ISD::SRL)
+    return false;
+  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+    return Const->getZExtValue() == 16;
+  return false;
+}
+
+static bool isSRA16(const SDValue &Op) {
+  if (Op.getOpcode() != ISD::SRA)
+    return false;
+  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+    return Const->getZExtValue() == 16;
+  return false;
+}
+
+static bool isSHL16(const SDValue &Op) {
+  if (Op.getOpcode() != ISD::SHL)
+    return false;
+  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+    return Const->getZExtValue() == 16;
+  return false;
+}
+
+// Check for a signed 16-bit value. We special case SRA because it makes it
+// more simple when also looking for SRAs that aren't sign extending a
+// smaller value. Without the check, we'd need to take extra care with
+// checking order for some operations.
+static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
+  if (isSRA16(Op))
+    return isSHL16(Op.getOperand(0));
+  return DAG.ComputeNumSignBits(Op) == 17;
+}
+
  /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
  static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
    switch (CC) {
@@ -9945,6 +9981,67 @@ static SDValue PerformANDCombine(SDNode *N,
    return SDValue();
  }
  
+// Try combining OR nodes to SMULWB, SMULWT.
+static SDValue PerformORCombineToSMULWBT(SDNode *OR,
+                                         TargetLowering::DAGCombinerInfo &DCI,
+                                         const ARMSubtarget *Subtarget) {
+  if (!Subtarget->hasV6Ops() ||
+      (Subtarget->isThumb() &&
+       (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
+    return SDValue();
+
+  SDValue SRL = OR->getOperand(0);
+  SDValue SHL = OR->getOperand(1);
+
+  if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
+    SRL = OR->getOperand(1);
+    SHL = OR->getOperand(0);
+  }
+  if (!isSRL16(SRL) || !isSHL16(SHL))
+    return SDValue();
+
+  // The first operands to the shifts need to be the two results from the
+  // same smul_lohi node.
+  if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
+       SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
+    return SDValue();
+
+  SDNode *SMULLOHI = SRL.getOperand(0).getNode();
+  if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
+      SHL.getOperand(0) != SDValue(SMULLOHI, 1))
+    return SDValue();
+
+  // Now we have:
+  // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
+  // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
+  // For SMUWB the 16-bit value will signed extended somehow.
+  // For SMULWT only the SRA is required.
+  // Check both sides of SMUL_LOHI
+  SDValue OpS16 = SMULLOHI->getOperand(0);
+  SDValue OpS32 = SMULLOHI->getOperand(1);
+
+  SelectionDAG &DAG = DCI.DAG;
+  if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
+    OpS16 = OpS32;
+    OpS32 = SMULLOHI->getOperand(0);
+  }
+
+  SDLoc dl(OR);
+  unsigned Opcode = 0;
+  if (isS16(OpS16, DAG))
+    Opcode = ARMISD::SMULWB;
+  else if (isSRA16(OpS16)) {
+    Opcode = ARMISD::SMULWT;
+    OpS16 = OpS16->getOperand(0);
+  }
+  else
+    return SDValue();
+
+  SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
+  return SDValue(OR, 0);
+}
+
  /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
  static SDValue PerformORCombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI,
@@ -9982,6 +10079,8 @@ static SDValue PerformORCombine(SDNode *N,
      // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
      if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
        return Result;
+    if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
+      return Result;
    }
  
    // The code below optimizes (or (and X, Y), Z).
@@ -11781,6 +11880,20 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
      return PerformVLDCombine(N, DCI);
    case ARMISD::BUILD_VECTOR:
      return PerformARMBUILD_VECTORCombine(N, DCI);
+  case ARMISD::SMULWB: {
+    unsigned BitWidth = N->getValueType(0).getSizeInBits();
+    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+    if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+      return SDValue();
+    break;
+  }
+  case ARMISD::SMULWT: {
+    unsigned BitWidth = N->getValueType(0).getSizeInBits();
+    APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
+    if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+      return SDValue();
+    break;
+  }
    case ISD::INTRINSIC_VOID:
    case ISD::INTRINSIC_W_CHAIN:
      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h

index b1a954c581320dea0c81e681efb3d6b5f81db279..20a678e4c7fe1fd69ff8dbc0245407353a408c4c 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -175,6 +175,8 @@ class InstrItineraryData;
        VMULLs,       // ...signed
        VMULLu,       // ...unsigned
  
+      SMULWB,       // Signed multiply word by half word, bottom
+      SMULWT,       // Signed multiply word by half word, top
        UMLAL,        // 64bit Unsigned Accumulate Multiply
        SMLAL,        // 64bit Signed Accumulate Multiply
        UMAAL,        // 64-bit Unsigned Accumulate Accumulate Multiply
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index fa7022abed0005a60d79519dde13d475ae598caa..2211f29a3ec9b39662eeef147fcd814f231e1dd3 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -183,6 +183,9 @@ def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
                          [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
                           SDNPMayStore, SDNPMayLoad]>;
  
+def ARMsmulwb       : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>;
+def ARMsmulwt       : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>;
+
  //===----------------------------------------------------------------------===//
  // ARM Instruction Predicate Definitions.
  //
@@ -4100,13 +4103,13 @@ multiclass AI_smul<string opc> {
  
    def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
-              []>,
+              [(set GPR:$Rd, (ARMsmulwb GPR:$Rn, GPR:$Rm))]>,
             Requires<[IsARM, HasV5TE]>,
             Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
  
    def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
-              []>,
+              [(set GPR:$Rd, (ARMsmulwt GPR:$Rn, GPR:$Rm))]>,
              Requires<[IsARM, HasV5TE]>,
             Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
  }
@@ -4153,14 +4156,16 @@ multiclass AI_smla<string opc> {
    def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
                (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
                IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
-              []>,
+              [(set GPRnopc:$Rd,
+                    (add GPR:$Ra, (ARMsmulwb GPRnopc:$Rn, GPRnopc:$Rm)))]>,
             Requires<[IsARM, HasV5TE, UseMulOps]>,
             Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
  
    def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
                (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
                IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
-              []>,
+              [(set GPRnopc:$Rd,
+                    (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>,
             Requires<[IsARM, HasV5TE, UseMulOps]>,
             Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
    }
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td

index bbcc3b0f50b424b44c1968fe00bdd05b28c4c8bd..4059356659b109a7a082fac32005bc5db36e9490 100644 (file)
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2676,8 +2676,10 @@ def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
  def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
               [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
                                     (sra rGPR:$Rm, (i32 16))))]>;
-def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>;
-def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>;
+def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb",
+             [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>;
+def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt",
+             [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>;
  
  def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
                     (t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
@@ -2712,8 +2714,10 @@ def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
  def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
               [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
                                                   (sra rGPR:$Rm, (i32 16)))))]>;
-def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>;
-def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>;
+def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>;
+def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwt rGPR:$Rn, rGPR:$Rm)))]>;
  
  def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
                        (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll

index 3c187aa846d54c08f77b96d9a0b4ab17b61f17c9..2b7be41ddb24e6df0ce7ebd3e40258fc456f731d 100644 (file)
--- a/test/CodeGen/ARM/smul.ll
+++ b/test/CodeGen/ARM/smul.ll
@@ -262,3 +262,32 @@ define i32 @f21(i32 %a, i32 %x, i16 %y) {
          %tmp5 = add i32 %a, %tmp4
          ret i32 %tmp5
  }
+
+@global_b = external global i16, align 2
+
+define i32 @f22(i32 %a) {
+; CHECK-LABEL: f22:
+; CHECK: smulwb r0, r0, r1
+; CHECK-THUMBV6-NOT: smulwb
+        %b = load i16, i16* @global_b, align 2
+        %sext = sext i16 %b to i64
+        %conv = sext i32 %a to i64
+        %mul = mul nsw i64 %sext, %conv
+        %shr37 = lshr i64 %mul, 16
+        %conv4 = trunc i64 %shr37 to i32
+        ret i32 %conv4
+}
+
+define i32 @f23(i32 %a, i32 %c) {
+; CHECK-LABEL: f23:
+; CHECK: smlawb r0, r0, r2, r1
+; CHECK-THUMBV6-NOT: smlawb
+        %b = load i16, i16* @global_b, align 2
+        %sext = sext i16 %b to i64
+        %conv = sext i32 %a to i64
+        %mul = mul nsw i64 %sext, %conv
+        %shr49 = lshr i64 %mul, 16
+        %conv5 = trunc i64 %shr49 to i32
+        %add = add nsw i32 %conv5, %c
+        ret i32 %add
+}
author	Sam Parker <sam.parker@arm.com>
	Tue, 14 Mar 2017 09:13:22 +0000 (09:13 +0000)
committer	Sam Parker <sam.parker@arm.com>
	Tue, 14 Mar 2017 09:13:22 +0000 (09:13 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.h		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
lib/Target/ARM/ARMInstrThumb2.td		patch \| blob \| history
test/CodeGen/ARM/smul.ll		patch \| blob \| history