[Power9] Added support for the modsw, moduw, modsd, modud hardware instructions.

author Tony Jiang <jtony@ca.ibm.com>

Mon, 12 Jun 2017 17:58:42 +0000 (17:58 +0000)

committer Tony Jiang <jtony@ca.ibm.com>

Mon, 12 Jun 2017 17:58:42 +0000 (17:58 +0000)
author Tony Jiang <jtony@ca.ibm.com>
Mon, 12 Jun 2017 17:58:42 +0000 (17:58 +0000)
committer Tony Jiang <jtony@ca.ibm.com>
Mon, 12 Jun 2017 17:58:42 +0000 (17:58 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index c78d3fa45c5226b281c3d78bab6a6b10cee19c58..8880e9d88fb3a56a750ee356fe0bf227ca32c361 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -204,11 +204,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
    setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
  
-  // PowerPC has no SREM/UREM instructions
-  setOperationAction(ISD::SREM, MVT::i32, Expand);
-  setOperationAction(ISD::UREM, MVT::i32, Expand);
-  setOperationAction(ISD::SREM, MVT::i64, Expand);
-  setOperationAction(ISD::UREM, MVT::i64, Expand);
+  // PowerPC has no SREM/UREM instructions unless we are on P9
+  // On P9 we may use a hardware instruction to compute the remainder.
+  // The instructions are not legalized directly because in the cases where the
+  // result of both the remainder and the division is required it is more
+  // efficient to compute the remainder from the result of the division rather
+  // than use the remainder instruction.
+  if (Subtarget.isISA3_0()) {
+    setOperationAction(ISD::SREM, MVT::i32, Custom);
+    setOperationAction(ISD::UREM, MVT::i32, Custom);
+    setOperationAction(ISD::SREM, MVT::i64, Custom);
+    setOperationAction(ISD::UREM, MVT::i64, Custom);
+  } else {
+    setOperationAction(ISD::SREM, MVT::i32, Expand);
+    setOperationAction(ISD::UREM, MVT::i32, Expand);
+    setOperationAction(ISD::SREM, MVT::i64, Expand);
+    setOperationAction(ISD::UREM, MVT::i64, Expand);
+  }
  
    // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
@@ -8394,6 +8406,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
    return SDValue();
  }
  
+SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
+  // Check for a DIV with the same operands as this REM.
+  for (auto UI : Op.getOperand(1)->uses()) {
+    if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
+        (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
+      if (UI->getOperand(0) == Op.getOperand(0) &&
+          UI->getOperand(1) == Op.getOperand(1))
+        return SDValue();
+  }
+  return Op;
+}
+
  SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                    SelectionDAG &DAG) const {
    SDLoc dl(Op);
@@ -8862,6 +8886,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  
    case ISD::INTRINSIC_VOID:
      return LowerINTRINSIC_VOID(Op, DAG);
+  case ISD::SREM:
+  case ISD::UREM:
+    return LowerREM(Op, DAG);
    }
  }
  
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 7982a4a9e9fb73eeab481d28340c4432300944dd..678d92898a0e6d126424dd9672c859e30af11d96 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -918,6 +918,7 @@ namespace llvm {
      SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td

index 295590b2acf624c0fc2f3d41da8fa43095a75751..70536a6039b82a33761ad0f72b34f404187c12bd 100644 (file)
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -683,6 +683,16 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                       "divde $rT, $rA, $rB", IIC_IntDivD,
                       [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
                       isPPC64, Requires<[HasExtDiv]>;
+
+let Predicates = [IsISA3_0] in {
+def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "modsd $rT, $rA, $rB", IIC_IntDivW,
+                        [(set i64:$rT, (srem i64:$rA, i64:$rB))]>;
+def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "modud $rT, $rA, $rB", IIC_IntDivW,
+                        [(set i64:$rT, (urem i64:$rA, i64:$rB))]>;
+}
+
  let Defs = [CR0] in
  def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                        "divde. $rT, $rA, $rB", IIC_IntDivD,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td

index 8223aa655e38bf6205cbf76534b3a4980425f111..37506239c58d9dd5e92dc8099f7116358fa91878 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2544,6 +2544,14 @@ let Uses = [RM] in {
                        "mffs. $rT", IIC_IntMFFS, []>, isDOT;
  }
  
+let Predicates = [IsISA3_0] in {
+def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "modsw $rT, $rA, $rB", IIC_IntDivW,
+                        [(set i32:$rT, (srem i32:$rA, i32:$rB))]>;
+def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "moduw $rT, $rA, $rB", IIC_IntDivW,
+                        [(set i32:$rT, (urem i32:$rA, i32:$rB))]>;
+}
  
  let PPC970_Unit = 1, hasSideEffects = 0 in {  // FXU Operations.
  // XO-Form instructions.  Arithmetic instructions that can set overflow bit
diff --git a/test/CodeGen/PowerPC/ppc64-P9-mod.ll b/test/CodeGen/PowerPC/ppc64-P9-mod.ll

new file mode 100644 (file)

index 0000000..46e347b
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-P9-mod.ll
@@ -0,0 +1,263 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-PWR8 -implicit-check-not mod[us][wd]
+
+@mod_resultsw = common local_unnamed_addr global i32 0, align 4
+@mod_resultud = common local_unnamed_addr global i64 0, align 8
+@div_resultsw = common local_unnamed_addr global i32 0, align 4
+@mod_resultuw = common local_unnamed_addr global i32 0, align 4
+@div_resultuw = common local_unnamed_addr global i32 0, align 4
+@div_resultsd = common local_unnamed_addr global i64 0, align 8
+@mod_resultsd = common local_unnamed_addr global i64 0, align 8
+@div_resultud = common local_unnamed_addr global i64 0, align 8
+
+; Function Attrs: norecurse nounwind
+define void @modulo_sw(i32 signext %a, i32 signext %b) local_unnamed_addr {
+entry:
+  %rem = srem i32 %a, %b
+  store i32 %rem, i32* @mod_resultsw, align 4
+  ret void
+; CHECK-LABEL: modulo_sw
+; CHECK: modsw {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define zeroext i32 @modulo_uw(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr {
+entry:
+  %rem = urem i32 %a, %b
+  ret i32 %rem
+; CHECK-LABEL: modulo_uw
+; CHECK: moduw {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_uw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define i64 @modulo_sd(i64 %a, i64 %b) local_unnamed_addr {
+entry:
+  %rem = srem i64 %a, %b
+  ret i64 %rem
+; CHECK-LABEL: modulo_sd
+; CHECK: modsd {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_sd
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_ud(i64 %a, i64 %b) local_unnamed_addr {
+entry:
+  %rem = urem i64 %a, %b
+  store i64 %rem, i64* @mod_resultud, align 8
+  ret void
+; CHECK-LABEL: modulo_ud
+; CHECK: modud {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_ud
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_sw(i32 signext %a, i32 signext %b) local_unnamed_addr {
+entry:
+  %rem = srem i32 %a, %b
+  store i32 %rem, i32* @mod_resultsw, align 4
+  %div = sdiv i32 %a, %b
+  store i32 %div, i32* @div_resultsw, align 4
+  ret void
+; CHECK-LABEL: modulo_div_sw
+; CHECK-NOT: modsw
+; CHECK: div
+; CHECK-NOT: modsw
+; CHECK: mull
+; CHECK-NOT: modsw
+; CHECK: sub
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_abc_sw(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr {
+entry:
+  %rem = srem i32 %a, %c
+  store i32 %rem, i32* @mod_resultsw, align 4
+  %div = sdiv i32 %b, %c
+  store i32 %div, i32* @div_resultsw, align 4
+  ret void
+; CHECK-LABEL: modulo_div_abc_sw
+; CHECK: modsw {{[0-9]+}}, 3, 5
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_abc_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_uw(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr {
+entry:
+  %rem = urem i32 %a, %b
+  store i32 %rem, i32* @mod_resultuw, align 4
+  %div = udiv i32 %a, %b
+  store i32 %div, i32* @div_resultuw, align 4
+  ret void
+; CHECK-LABEL: modulo_div_uw
+; CHECK-NOT: modsw
+; CHECK: div
+; CHECK-NOT: modsw
+; CHECK: mull
+; CHECK-NOT: modsw
+; CHECK: sub
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_uw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_swuw(i32 signext %a, i32 signext %b) local_unnamed_addr {
+entry:
+  %rem = srem i32 %a, %b
+  store i32 %rem, i32* @mod_resultsw, align 4
+  %div = udiv i32 %a, %b
+  store i32 %div, i32* @div_resultsw, align 4
+  ret void
+; CHECK-LABEL: modulo_div_swuw
+; CHECK: modsw {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_swuw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_udsd(i64 %a, i64 %b) local_unnamed_addr {
+entry:
+  %rem = urem i64 %a, %b
+  store i64 %rem, i64* @mod_resultud, align 8
+  %div = sdiv i64 %a, %b
+  store i64 %div, i64* @div_resultsd, align 8
+  ret void
+; CHECK-LABEL: modulo_div_udsd
+; CHECK: modud {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_udsd
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_const32_sw(i32 signext %a) local_unnamed_addr {
+entry:
+  %rem = srem i32 %a, 32
+  store i32 %rem, i32* @mod_resultsw, align 4
+  ret void
+; CHECK-LABEL: modulo_const32_sw
+; CHECK-NOT: modsw
+; CHECK: srawi
+; CHECK-NOT: modsw
+; CHECK: addze
+; CHECK-NOT: modsw
+; CHECK: slwi
+; CHECK-NOT: modsw
+; CHECK: subf
+; CHECK-NOT: modsw
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_const32_sw
+; CHECK-PWR8: srawi
+; CHECK-PWR8: addze
+; CHECK-PWR8: slwi
+; CHECK-PWR8: subf
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @modulo_const3_sw(i32 signext %a) local_unnamed_addr {
+entry:
+  %rem = srem i32 %a, 3
+  ret i32 %rem
+; CHECK-LABEL: modulo_const3_sw
+; CHECK-NOT: modsw
+; CHECK: mull
+; CHECK-NOT: modsw
+; CHECK: sub
+; CHECK-NOT: modsw
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_const3_sw
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @const2_modulo_sw(i32 signext %a) local_unnamed_addr {
+entry:
+  %rem = srem i32 2, %a
+  ret i32 %rem
+; CHECK-LABEL: const2_modulo_sw
+; CHECK: modsw {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK: blr
+; CHECK-PWR8-LABEL: const2_modulo_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+; FIXME On power 9 this test will still produce modsw because the divide is in
+; a different block than the remainder. Due to the nature of the SDAG we cannot
+; see the div in the other block.
+define void @blocks_modulo_div_sw(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr {
+entry:
+  %div = sdiv i32 %a, %b
+  store i32 %div, i32* @div_resultsw, align 4
+  %cmp = icmp sgt i32 %c, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %rem = srem i32 %a, %b
+  store i32 %rem, i32* @mod_resultsw, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+; CHECK-LABEL: blocks_modulo_div_sw
+; CHECK: div
+; CHECK: modsw {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: blocks_modulo_div_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt

index a6d079297bcf8eae3f369cd62011aff7bcf09c68..25ed35fcb1c08e80519a114d2e145bf8ebc76b85 100644 (file)
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
@@ -352,6 +352,18 @@
  # CHECK: divweu. 2, 3, 4
  0x7c 0x43 0x23 0x17
  
+# CHECK: modsw 2, 3, 4
+0x7c 0x43 0x26 0x16
+
+# CHECK: moduw 2, 3, 4
+0x7c 0x43 0x22 0x16
+
+# CHECK: modsd 2, 3, 4
+0x7c 0x43 0x26 0x12
+
+# CHECK: modud 2, 3, 4
+0x7c 0x43 0x22 0x12
+
  # CHECK: mulld 2, 3, 4                   
  0x7c 0x43 0x21 0xd2
  
diff --git a/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt

index 9ddc286d8aaa155be2c57a1c2dd76bfb7c1639a9..9dc994010551e3d4d1a8e4b0dd5ddc11edfdd88d 100644 (file)
--- a/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
@@ -349,6 +349,18 @@
  # CHECK: divweu. 2, 3, 4
  0x17 0x23 0x43 0x7c
  
+# CHECK: modsw 2, 3, 4
+0x16 0x26 0x43 0x7c
+
+# CHECK: moduw 2, 3, 4
+0x16 0x22 0x43 0x7c
+
+# CHECK: modsd 2, 3, 4
+0x12 0x26 0x43 0x7c
+
+# CHECK: modud 2, 3, 4
+0x12 0x22 0x43 0x7c
+
  # CHECK: mulld 2, 3, 4
  0xd2 0x21 0x43 0x7c
  
diff --git a/test/MC/PowerPC/ppc64-encoding.s b/test/MC/PowerPC/ppc64-encoding.s

index a772ca44986c5248181a18222a27ee5b27a6d20e..237dd5cfd72752aecdfee6626abe8ee4ba30a0cc 100644 (file)
--- a/test/MC/PowerPC/ppc64-encoding.s
+++ b/test/MC/PowerPC/ppc64-encoding.s
@@ -493,6 +493,19 @@
  # FIXME:    divweuo 2, 3, 4
  # FIXME:    divweuo. 2, 3, 4
  
+# CHECK-BE: modsw 2, 3, 4                   # encoding: [0x7c,0x43,0x26,0x16]
+# CHECK-LE: modsw 2, 3, 4                   # encoding: [0x16,0x26,0x43,0x7c]
+            modsw 2, 3, 4
+# CHECK-BE: moduw 2, 3, 4                   # encoding: [0x7c,0x43,0x22,0x16]
+# CHECK-LE: moduw 2, 3, 4                   # encoding: [0x16,0x22,0x43,0x7c]
+            moduw 2, 3, 4
+# CHECK-BE: modsd 2, 3, 4                   # encoding: [0x7c,0x43,0x26,0x12]
+# CHECK-LE: modsd 2, 3, 4                   # encoding: [0x12,0x26,0x43,0x7c]
+            modsd 2, 3, 4
+# CHECK-BE: modud 2, 3, 4                   # encoding: [0x7c,0x43,0x22,0x12]
+# CHECK-LE: modud 2, 3, 4                   # encoding: [0x12,0x22,0x43,0x7c]
+            modud 2, 3, 4
+
  # CHECK-BE: mulld 2, 3, 4                   # encoding: [0x7c,0x43,0x21,0xd2]
  # CHECK-LE: mulld 2, 3, 4                   # encoding: [0xd2,0x21,0x43,0x7c]
              mulld 2, 3, 4
author	Tony Jiang <jtony@ca.ibm.com>
	Mon, 12 Jun 2017 17:58:42 +0000 (17:58 +0000)
committer	Tony Jiang <jtony@ca.ibm.com>
	Mon, 12 Jun 2017 17:58:42 +0000 (17:58 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
lib/Target/PowerPC/PPCInstr64Bit.td		patch \| blob \| history
lib/Target/PowerPC/PPCInstrInfo.td		patch \| blob \| history
test/CodeGen/PowerPC/ppc64-P9-mod.ll	[new file with mode: 0644]	patch \| blob
test/MC/Disassembler/PowerPC/ppc64-encoding.txt		patch \| blob \| history
test/MC/Disassembler/PowerPC/ppc64le-encoding.txt		patch \| blob \| history
test/MC/PowerPC/ppc64-encoding.s		patch \| blob \| history