[Hexagon] Early-if-convert branches that may exit the loop

author Krzysztof Parzyszek <kparzysz@codeaurora.org>

Mon, 6 Mar 2017 17:24:04 +0000 (17:24 +0000)

committer Krzysztof Parzyszek <kparzysz@codeaurora.org>

Mon, 6 Mar 2017 17:24:04 +0000 (17:24 +0000)
author Krzysztof Parzyszek <kparzysz@codeaurora.org>
Mon, 6 Mar 2017 17:24:04 +0000 (17:24 +0000)
committer Krzysztof Parzyszek <kparzysz@codeaurora.org>
Mon, 6 Mar 2017 17:24:04 +0000 (17:24 +0000)
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp

index c1891674721265f3fd27d0e9eb37f5bb34278dac..4418b57f6a060a9829d71d2f77c3ce0da140710f 100644 (file)
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -105,6 +105,8 @@ namespace {
      cl::init(false), cl::desc("Enable branch probability info"));
    cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
      cl::desc("Size limit in Hexagon early if-conversion"));
+  cl::opt<bool> SkipExitBranches("eif-no-loop-exit", cl::init(false),
+    cl::Hidden, cl::desc("Do not convert branches that may exit the loop"));
  
    struct PrintMB {
      PrintMB(const MachineBasicBlock *B) : MB(B) {}
@@ -142,8 +144,8 @@ namespace {
    raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
      OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
         << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
-       << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
-       << PrintMB(P.FP.FalseB)
+       << ", TrueB:" << PrintMB(P.FP.TrueB)
+       << ", FalseB:" << PrintMB(P.FP.FalseB)
         << ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
      return OS;
    }
@@ -187,7 +189,8 @@ namespace {
      bool usesUndefVReg(const MachineInstr *MI) const;
      bool isValid(const FlowPattern &FP) const;
      unsigned countPredicateDefs(const MachineBasicBlock *B) const;
-    unsigned computePhiCost(MachineBasicBlock *B) const;
+    unsigned computePhiCost(const MachineBasicBlock *B,
+          const FlowPattern &FP) const;
      bool isProfitable(const FlowPattern &FP) const;
      bool isPredicableStore(const MachineInstr *MI) const;
      bool isSafeToSpeculate(const MachineInstr *MI) const;
@@ -199,6 +202,9 @@ namespace {
            MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
            unsigned PredR, bool IfTrue);
  
+    unsigned buildMux(MachineBasicBlock *B, MachineBasicBlock::iterator At,
+          const TargetRegisterClass *DRC, unsigned PredR, unsigned TR,
+          unsigned TSR, unsigned FR, unsigned FSR);
      void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
      void convert(const FlowPattern &FP);
  
@@ -230,7 +236,7 @@ bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
      return false;
    MachineBasicBlock *SB = *B->succ_begin();
    MachineLoop *L = MLI->getLoopFor(SB);
-  return L && SB == L->getHeader();
+  return L && SB == L->getHeader() && MDT->dominates(B, SB);
  }
  
  bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
@@ -264,9 +270,6 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
      // mark as diamond with both sides equal?
      return false;
    }
-  // Loop could be null for both.
-  if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
-    return false;
  
    // Record the true/false blocks in such a way that "true" means "if (PredR)",
    // and "false" means "if (!PredR)".
@@ -289,8 +292,14 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
    // it has a single successor. In fact, the block has to end either with
    // an unconditional branch (which can be predicated), or with a fall-
    // through.
-  bool TOk = (TNP == 1) && (TNS == 1);
-  bool FOk = (FNP == 1) && (FNS == 1);
+  // Also, skip blocks that do not belong to the same loop.
+  bool TOk = (TNP == 1 && TNS == 1 && MLI->getLoopFor(TB) == L);
+  bool FOk = (FNP == 1 && FNS == 1 && MLI->getLoopFor(FB) == L);
+
+  // If requested (via an option), do not consider branches where the
+  // true and false targets do not belong to the same loop.
+  if (SkipExitBranches && MLI->getLoopFor(TB) != MLI->getLoopFor(FB))
+    return false;
  
    // If neither is predicable, there is nothing interesting.
    if (!TOk && !FOk)
@@ -307,17 +316,15 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
        // Diamond: "if (P) then TB; else FB;".
      } else {
        // TOk && !FOk
-      if (TSB == FB) {
+      if (TSB == FB)
          JB = FB;
-        FB = nullptr;
-      }
+      FB = nullptr;
      }
    } else {
      // !TOk && FOk  (at least one must be true by now).
-    if (FSB == TB) {
+    if (FSB == TB)
        JB = TB;
-      TB = nullptr;
-    }
+    TB = nullptr;
    }
    // Don't try to predicate loop preheaders.
    if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
@@ -448,24 +455,39 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
    return true;
  }
  
-unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
-  assert(B->pred_size() <= 2);
+unsigned HexagonEarlyIfConversion::computePhiCost(const MachineBasicBlock *B,
+      const FlowPattern &FP) const {
    if (B->pred_size() < 2)
      return 0;
  
    unsigned Cost = 0;
-  MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
-  for (I = B->begin(); I != E; ++I) {
-    const MachineOperand &RO1 = I->getOperand(1);
-    const MachineOperand &RO3 = I->getOperand(3);
-    assert(RO1.isReg() && RO3.isReg());
+  for (const MachineInstr &MI : *B) {
+    if (!MI.isPHI())
+      break;
+    // If both incoming blocks are one of the TrueB/FalseB/SplitB, then
+    // a MUX may be needed. Otherwise the PHI will need to be updated at
+    // no extra cost.
+    // Find the interesting PHI operands for further checks.
+    SmallVector<unsigned,2> Inc;
+    for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+      const MachineBasicBlock *BB = MI.getOperand(i+1).getMBB();
+      if (BB == FP.SplitB || BB == FP.TrueB || BB == FP.FalseB)
+        Inc.push_back(i);
+    }
+    assert(Inc.size() <= 2);
+    if (Inc.size() < 2)
+      continue;
+
+    const MachineOperand &RA = MI.getOperand(1);
+    const MachineOperand &RB = MI.getOperand(3);
+    assert(RA.isReg() && RB.isReg());
      // Must have a MUX if the phi uses a subregister.
-    if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
+    if (RA.getSubReg() != 0 || RA.getSubReg() != 0) {
        Cost++;
        continue;
      }
-    MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
-    MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
+    const MachineInstr *Def1 = MRI->getVRegDef(RA.getReg());
+    const MachineInstr *Def3 = MRI->getVRegDef(RB.getReg());
      if (!HII->isPredicable(*Def1) || !HII->isPredicable(*Def3))
        Cost++;
    }
@@ -491,7 +513,6 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs(
  
  bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
    if (FP.TrueB && FP.FalseB) {
-
      // Do not IfCovert if the branch is one sided.
      if (MBPI) {
        BranchProbability Prob(9, 10);
@@ -542,17 +563,17 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
    unsigned TotalPh = 0;
    unsigned PredDefs = countPredicateDefs(FP.SplitB);
    if (FP.JoinB) {
-    TotalPh = computePhiCost(FP.JoinB);
+    TotalPh = computePhiCost(FP.JoinB, FP);
      PredDefs += countPredicateDefs(FP.JoinB);
    } else {
      if (FP.TrueB && FP.TrueB->succ_size() > 0) {
        MachineBasicBlock *SB = *FP.TrueB->succ_begin();
-      TotalPh += computePhiCost(SB);
+      TotalPh += computePhiCost(SB, FP);
        PredDefs += countPredicateDefs(SB);
      }
      if (FP.FalseB && FP.FalseB->succ_size() > 0) {
        MachineBasicBlock *SB = *FP.FalseB->succ_begin();
-      TotalPh += computePhiCost(SB);
+      TotalPh += computePhiCost(SB, FP);
        PredDefs += countPredicateDefs(SB);
      }
    }
@@ -739,6 +760,43 @@ void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
    }
  }
  
+unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock *B,
+      MachineBasicBlock::iterator At, const TargetRegisterClass *DRC,
+      unsigned PredR, unsigned TR, unsigned TSR, unsigned FR, unsigned FSR) {
+  unsigned Opc = 0;
+  switch (DRC->getID()) {
+    case Hexagon::IntRegsRegClassID:
+      Opc = Hexagon::C2_mux;
+      break;
+    case Hexagon::DoubleRegsRegClassID:
+      Opc = Hexagon::PS_pselect;
+      break;
+    case Hexagon::VectorRegsRegClassID:
+      Opc = Hexagon::PS_vselect;
+      break;
+    case Hexagon::VecDblRegsRegClassID:
+      Opc = Hexagon::PS_wselect;
+      break;
+    case Hexagon::VectorRegs128BRegClassID:
+      Opc = Hexagon::PS_vselect_128B;
+      break;
+    case Hexagon::VecDblRegs128BRegClassID:
+      Opc = Hexagon::PS_wselect_128B;
+      break;
+    default:
+      llvm_unreachable("unexpected register type");
+  }
+  const MCInstrDesc &D = HII->get(Opc);
+
+  DebugLoc DL = B->findBranchDebugLoc();
+  unsigned MuxR = MRI->createVirtualRegister(DRC);
+  BuildMI(*B, At, DL, D, MuxR)
+    .addReg(PredR)
+    .addReg(TR, 0, TSR)
+    .addReg(FR, 0, FSR);
+  return MuxR;
+}
+
  void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
        const FlowPattern &FP) {
    // Visit all PHI nodes in the WhereB block and generate MUX instructions
@@ -765,40 +823,25 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
        TR = SR, TSR = SSR;
      else if (FR == 0)
        FR = SR, FSR = SSR;
-    assert(TR && FR);
-
-    using namespace Hexagon;
-
-    unsigned DR = PN->getOperand(0).getReg();
-    const TargetRegisterClass *RC = MRI->getRegClass(DR);
-    unsigned Opc = 0;
-    if (RC == &IntRegsRegClass)
-      Opc = C2_mux;
-    else if (RC == &DoubleRegsRegClass)
-      Opc = PS_pselect;
-    else if (RC == &VectorRegsRegClass)
-      Opc = PS_vselect;
-    else if (RC == &VecDblRegsRegClass)
-      Opc = PS_wselect;
-    else if (RC == &VectorRegs128BRegClass)
-      Opc = PS_vselect_128B;
-    else if (RC == &VecDblRegs128BRegClass)
-      Opc = PS_wselect_128B;
-    else
-      llvm_unreachable("unexpected register type");
-    const MCInstrDesc &D = HII->get(Opc);
-
-    MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
-    DebugLoc DL;
-    if (MuxAt != FP.SplitB->end())
-      DL = MuxAt->getDebugLoc();
-    unsigned MuxR = MRI->createVirtualRegister(RC);
-    BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
-      .addReg(FP.PredR)
-      .addReg(TR, 0, TSR)
-      .addReg(FR, 0, FSR);
-
-    PN->addOperand(MachineOperand::CreateReg(MuxR, false));
+
+    assert(TR || FR);
+    unsigned MuxR = 0, MuxSR = 0;
+
+    if (TR && FR) {
+      unsigned DR = PN->getOperand(0).getReg();
+      const TargetRegisterClass *RC = MRI->getRegClass(DR);
+      MuxR = buildMux(FP.SplitB, FP.SplitB->getFirstTerminator(), RC,
+                      FP.PredR, TR, TSR, FR, FSR);
+    } else if (TR) {
+      MuxR = TR;
+      MuxSR = TSR;
+    } else {
+      MuxR = FR;
+      MuxSR = FSR;
+    }
+
+    PN->addOperand(MachineOperand::CreateReg(MuxR, false, false, false, false,
+                                             false, false, MuxSR));
      PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
    }
  }
diff --git a/test/CodeGen/Hexagon/block-addr.ll b/test/CodeGen/Hexagon/block-addr.ll

index c0db2cef545e5db6b675a21337420cbed3bbf328..5af3a69f8aab196b5a06ff4cb1865d878097f7de 100644 (file)
--- a/test/CodeGen/Hexagon/block-addr.ll
+++ b/test/CodeGen/Hexagon/block-addr.ll
@@ -2,7 +2,7 @@
  
  ; CHECK: .LJTI
  ; CHECK-DAG: r[[REG:[0-9]+]] = memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+<<#[0-9]+}})
-; CHECK-DAG: jumpr:nt r[[REG]]
+; CHECK-DAG: jumpr r[[REG]]
  
  define void @main() #0 {
  entry:
diff --git a/test/CodeGen/Hexagon/early-if-merge-loop.ll b/test/CodeGen/Hexagon/early-if-merge-loop.ll

new file mode 100644 (file)

index 0000000..f45058f
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if-merge-loop.ll
@@ -0,0 +1,91 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Make sure that the loop in the end has only one basic block.
+
+; CHECK-LABEL: fred
+; Rely on the comments, make sure the one for the loop header is present.
+; CHECK: %loop
+; CHECK-NOT: %should_merge
+
+target triple = "hexagon"
+
+define i32 @fred(i32 %a0, i64* nocapture readonly %a1) #0 {
+b2:
+  %v3 = bitcast i64* %a1 to i32*
+  %v4 = getelementptr inbounds i32, i32* %v3, i32 1
+  %v5 = zext i32 %a0 to i64
+  br label %loop
+
+loop:                                             ; preds = %should_merge, %b2
+  %v7 = phi i32 [ 0, %b2 ], [ %v49, %should_merge ]
+  %v8 = phi i32 [ 0, %b2 ], [ %v42, %should_merge ]
+  %v9 = phi i32* [ %v4, %b2 ], [ %v53, %should_merge ]
+  %v10 = phi i32 [ 0, %b2 ], [ %v30, %should_merge ]
+  %v11 = phi i32* [ %v3, %b2 ], [ %v51, %should_merge ]
+  %v12 = phi i32 [ 0, %b2 ], [ %v23, %should_merge ]
+  %v13 = phi i32 [ 2, %b2 ], [ %v54, %should_merge ]
+  %v14 = load i32, i32* %v11, align 4, !tbaa !0
+  %v15 = load i32, i32* %v9, align 4, !tbaa !0
+  %v16 = icmp ult i32 %v13, 30
+  %v17 = zext i32 %v12 to i64
+  %v18 = shl nuw i64 %v17, 32
+  %v19 = zext i32 %v14 to i64
+  %v20 = or i64 %v18, %v19
+  %v21 = tail call i64 @llvm.hexagon.A2.addp(i64 %v20, i64 %v5)
+  %v22 = lshr i64 %v21, 32
+  %v23 = trunc i64 %v22 to i32
+  %v24 = zext i32 %v10 to i64
+  %v25 = shl nuw i64 %v24, 32
+  %v26 = zext i32 %v15 to i64
+  %v27 = or i64 %v25, %v26
+  %v28 = tail call i64 @llvm.hexagon.A2.addp(i64 %v27, i64 %v5)
+  %v29 = lshr i64 %v28, 32
+  %v30 = trunc i64 %v29 to i32
+  %v31 = getelementptr inbounds i32, i32* %v3, i32 %v13
+  %v32 = load i32, i32* %v31, align 4, !tbaa !0
+  %v33 = or i32 %v13, 1
+  %v34 = getelementptr inbounds i32, i32* %v3, i32 %v33
+  %v35 = load i32, i32* %v34, align 4, !tbaa !0
+  %v36 = zext i32 %v8 to i64
+  %v37 = shl nuw i64 %v36, 32
+  %v38 = zext i32 %v32 to i64
+  %v39 = or i64 %v37, %v38
+  %v40 = tail call i64 @llvm.hexagon.A2.subp(i64 %v39, i64 %v5)
+  %v41 = lshr i64 %v40, 32
+  %v42 = trunc i64 %v41 to i32
+  %v43 = zext i32 %v7 to i64
+  %v44 = shl nuw i64 %v43, 32
+  %v45 = zext i32 %v35 to i64
+  %v46 = or i64 %v44, %v45
+  %v47 = tail call i64 @llvm.hexagon.A2.subp(i64 %v46, i64 %v5)
+  %v48 = lshr i64 %v47, 32
+  %v49 = trunc i64 %v48 to i32
+  br i1 %v16, label %should_merge, label %exit
+
+should_merge:                                     ; preds = %loop
+  %v50 = add nuw nsw i32 %v13, 2
+  %v51 = getelementptr inbounds i32, i32* %v3, i32 %v50
+  %v52 = add nuw nsw i32 %v13, 3
+  %v53 = getelementptr inbounds i32, i32* %v3, i32 %v52
+  %v54 = add nuw nsw i32 %v13, 4
+  br label %loop
+
+exit:                                             ; preds = %loop
+  %v57 = tail call i64 @llvm.hexagon.A2.combinew(i32 %v42, i32 %v23)
+  %v58 = tail call i64 @llvm.hexagon.A2.combinew(i32 %v49, i32 %v30)
+  %v59 = tail call i64 @llvm.hexagon.A2.addp(i64 %v57, i64 %v58)
+  %v60 = lshr i64 %v59, 32
+  %v61 = trunc i64 %v60 to i32
+  ret i32 %v61
+}
+
+declare i64 @llvm.hexagon.A2.addp(i64, i64) #1
+declare i64 @llvm.hexagon.A2.subp(i64, i64) #1
+declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1
+
+attributes #0 = { nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" }
+attributes #1 = { nounwind readnone }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"long", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
author	Krzysztof Parzyszek <kparzysz@codeaurora.org>
	Mon, 6 Mar 2017 17:24:04 +0000 (17:24 +0000)
committer	Krzysztof Parzyszek <kparzysz@codeaurora.org>
	Mon, 6 Mar 2017 17:24:04 +0000 (17:24 +0000)
lib/Target/Hexagon/HexagonEarlyIfConv.cpp		patch \| blob \| history
test/CodeGen/Hexagon/block-addr.ll		patch \| blob \| history
test/CodeGen/Hexagon/early-if-merge-loop.ll	[new file with mode: 0644]	patch \| blob