From: Krzysztof Parzyszek <kparzysz@codeaurora.org>
Date: Thu, 9 Mar 2017 16:29:30 +0000 (+0000)
Subject: [Hexagon] Propagate zext of i1 into arithmetic code in selection DAG
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e5673c385915af68931fa3c0a32890564e29850f;p=llvm

[Hexagon] Propagate zext of i1 into arithmetic code in selection DAG

(op ... (zext i1 c) ...) -> (select c (op ... 1 ...),
                                      (op ... 0 ...))


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297391 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 06854ba5e09..632f2a3065f 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -976,6 +976,47 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 }
 
 
+static bool isMemOPCandidate(SDNode *I, SDNode *U) {
+  // I is an operand of U. Check if U is an arithmetic (binary) operation
+  // usable in a memop, where the other operand is a loaded value, and the
+  // result of U is stored in the same location.
+
+  if (!U->hasOneUse())
+    return false;
+  unsigned Opc = U->getOpcode();
+  switch (Opc) {
+    case ISD::ADD:
+    case ISD::SUB:
+    case ISD::AND:
+    case ISD::OR:
+      break;
+    default:
+      return false;
+  }
+
+  SDValue S0 = U->getOperand(0);
+  SDValue S1 = U->getOperand(1);
+  SDValue SY = (S0.getNode() == I) ? S1 : S0;
+
+  SDNode *UUse = *U->use_begin();
+  if (UUse->getNumValues() != 1)
+    return false;
+
+  // Check if one of the inputs to U is a load instruction and the output
+  // is used by a store instruction. If so and they also have the same
+  // base pointer, then don't preoprocess this node sequence as it
+  // can be matched to a memop.
+  SDNode *SYNode = SY.getNode();
+  if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
+    SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
+    SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
+    if (LDBasePtr == STBasePtr)
+      return true;
+  }
+  return false;
+}
+
+
 void HexagonDAGToDAGISel::PreprocessISelDAG() {
   SelectionDAG &DAG = *CurDAG;
   std::vector<SDNode*> Nodes;
@@ -1148,6 +1189,61 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
     ReplaceNode(T0.getNode(), NewShl.getNode());
   }
 
+  // Transform (op (zext (i1 c) t)
+  //        to (select c (op 0 t) (op 1 t))
+  for (SDNode *N : Nodes) {
+    unsigned Opc = N->getOpcode();
+    if (Opc != ISD::ZERO_EXTEND)
+      continue;
+    SDValue OpI1 = N->getOperand(0);
+    EVT OpVT = OpI1.getValueType();
+    if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
+      continue;
+    for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+      SDNode *U = *I;
+      if (U->getNumValues() != 1)
+        continue;
+      EVT UVT = U->getValueType(0);
+      if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
+        continue;
+      if (isMemOPCandidate(N, U))
+        continue;
+
+      // Potentially simplifiable operation.
+      unsigned I1N = I.getOperandNo();
+      SmallVector<SDValue,2> Ops(U->getNumOperands());
+      for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
+        Ops[i] = U->getOperand(i);
+      EVT BVT = Ops[I1N].getValueType();
+
+      SDLoc dl(U);
+      SDValue C0 = DAG.getConstant(0, dl, BVT);
+      SDValue C1 = DAG.getConstant(1, dl, BVT);
+      SDValue If0, If1;
+
+      if (isa<MachineSDNode>(U)) {
+        unsigned UseOpc = U->getMachineOpcode();
+        Ops[I1N] = C0;
+        If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
+        Ops[I1N] = C1;
+        If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
+      } else {
+        unsigned UseOpc = U->getOpcode();
+        Ops[I1N] = C0;
+        If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
+        Ops[I1N] = C1;
+        If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
+      }
+      SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
+      DAG.ReplaceAllUsesWith(U, Sel.getNode());
+    }
+  }
+
+  DEBUG_WITH_TYPE("isel", {
+    dbgs() << "Preprocessed (Hexagon) selection DAG:";
+    CurDAG->dump();
+  });
+
   if (EnableAddressRebalancing) {
     rebalanceAddressTrees();
 
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
index 5af3b071cd0..12913eea7e8 100644
--- a/test/CodeGen/Hexagon/adde.ll
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -1,33 +1,27 @@
-; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 -disable-post-ra < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
 
-; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,#1)
-; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
-; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},#1,#0)
-; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,r{{[0-9]+}})
-; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}})
-; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}})
-; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}},r{{[0-9]+}})
-; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}})
+; CHECK-DAG: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}})
 
-
-define void @check_adde_addc (i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
-entry:
-        %tmp1 = zext i64 %AL to i128
-        %tmp23 = zext i64 %AH to i128
-        %tmp4 = shl i128 %tmp23, 64
-        %tmp5 = or i128 %tmp4, %tmp1
-        %tmp67 = zext i64 %BL to i128
-        %tmp89 = zext i64 %BH to i128
-        %tmp11 = shl i128 %tmp89, 64
-        %tmp12 = or i128 %tmp11, %tmp67
-        %tmp15 = add i128 %tmp12, %tmp5
-        %tmp1617 = trunc i128 %tmp15 to i64
-        store i64 %tmp1617, i64* %RL
-        %tmp21 = lshr i128 %tmp15, 64
-        %tmp2122 = trunc i128 %tmp21 to i64
-        store i64 %tmp2122, i64* %RH
-        ret void
+define void @check_adde_addc(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64* %a4, i64* %a5) {
+b6:
+  %v7 = zext i64 %a0 to i128
+  %v8 = zext i64 %a1 to i128
+  %v9 = shl i128 %v8, 64
+  %v10 = or i128 %v7, %v9
+  %v11 = zext i64 %a2 to i128
+  %v12 = zext i64 %a3 to i128
+  %v13 = shl i128 %v12, 64
+  %v14 = or i128 %v11, %v13
+  %v15 = add i128 %v10, %v14
+  %v16 = lshr i128 %v15, 64
+  %v17 = trunc i128 %v15 to i64
+  %v18 = trunc i128 %v16 to i64
+  store i64 %v17, i64* %a4
+  store i64 %v18, i64* %a5
+  ret void
 }
diff --git a/test/CodeGen/Hexagon/isel-op-zext-i1.ll b/test/CodeGen/Hexagon/isel-op-zext-i1.ll
new file mode 100644
index 00000000000..d77d0929e21
--- /dev/null
+++ b/test/CodeGen/Hexagon/isel-op-zext-i1.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
+
+; In the IR, the i1 value is zero-extended first, then passed to add.
+; Check that in the final code, the mux happens after the add.
+; CHECK: [[REG1:r[0-9]+]] = add([[REG0:r[0-9]+]],#1)
+; CHECK: r{{[0-9]+}} = mux(p{{[0-3]}},[[REG1]],[[REG0]])
+
+define i32 @foo(i32 %a, i32 %b) {
+  %v0 = icmp eq i32 %a, %b
+  %v1 = zext i1 %v0 to i32
+  %v2 = add i32 %v1, %a
+  ret i32 %v2
+}
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
index bd0da980cf7..2b09a998eff 100644
--- a/test/CodeGen/Hexagon/sube.ll
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -1,27 +1,26 @@
-; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 -disable-post-ra < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
 
-; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},#1,#0
-; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,r{{[0-9]+}})
-; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}})
+; CHECK-DAG: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}})
+; CHECK-DAG: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}})
 
-define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
-entry:
-        %tmp1 = zext i64 %AL to i128
-        %tmp23 = zext i64 %AH to i128
-        %tmp4 = shl i128 %tmp23, 64
-        %tmp5 = or i128 %tmp4, %tmp1
-        %tmp67 = zext i64 %BL to i128
-        %tmp89 = zext i64 %BH to i128
-        %tmp11 = shl i128 %tmp89, 64
-        %tmp12 = or i128 %tmp11, %tmp67
-        %tmp15 = sub i128 %tmp5, %tmp12
-        %tmp1617 = trunc i128 %tmp15 to i64
-        store i64 %tmp1617, i64* %RL
-        %tmp21 = lshr i128 %tmp15, 64
-        %tmp2122 = trunc i128 %tmp21 to i64
-        store i64 %tmp2122, i64* %RH
-        ret void
+define void @check_sube_subc(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64* %a4, i64* %a5) {
+b6:
+  %v7 = zext i64 %a0 to i128
+  %v8 = zext i64 %a1 to i128
+  %v9 = shl i128 %v8, 64
+  %v10 = or i128 %v7, %v9
+  %v11 = zext i64 %a2 to i128
+  %v12 = zext i64 %a3 to i128
+  %v13 = shl i128 %v12, 64
+  %v14 = or i128 %v11, %v13
+  %v15 = sub i128 %v10, %v14
+  %v16 = lshr i128 %v15, 64
+  %v17 = trunc i128 %v15 to i64
+  %v18 = trunc i128 %v16 to i64
+  store i64 %v17, i64* %a4
+  store i64 %v18, i64* %a5
+  ret void
 }