From: Lei Huang Date: Tue, 12 Sep 2017 18:39:11 +0000 (+0000) Subject: Update branch coalescing to be a PowerPC specific pass X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c64508a42f8c84b25b40769016cddce4548abdab;p=llvm Update branch coalescing to be a PowerPC specific pass Implementing this pass as a PowerPC specific pass. Branch coalescing utilizes the analyzeBranch method which currently does not include any implicit operands. This is not an issue on PPC but must be handled on other targets. Pass is currently off by default. Enabled via -enable-ppc-branch-coalesce. Differential Revision : https: // reviews.llvm.org/D32776 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313061 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 96cfce5b84d..1377a6dd6aa 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -409,9 +409,6 @@ namespace llvm { /// This pass frees the memory occupied by the MachineFunction. FunctionPass *createFreeMachineFunctionPass(); - /// This pass combine basic blocks guarded by the same branch. - extern char &BranchCoalescingID; - /// This pass performs outlining on machine instructions directly before /// printing assembly. ModulePass *createMachineOutlinerPass(); diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 8ee6a053907..bf54b6471f4 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -76,7 +76,6 @@ void initializeBasicAAWrapperPassPass(PassRegistry&); void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); void initializeBoundsCheckingPass(PassRegistry&); -void initializeBranchCoalescingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 7f3c6da9126..7ec7fda4e44 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -4,7 +4,6 @@ add_llvm_library(LLVMCodeGen Analysis.cpp AtomicExpandPass.cpp BasicTargetTransformInfo.cpp - BranchCoalescing.cpp BranchFolding.cpp BranchRelaxation.cpp BuiltinGCs.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 75f62ba4719..f4ccb4889d3 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -21,7 +21,6 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); - initializeBranchCoalescingPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCodeGenPreparePass(Registry); diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 329768769f1..4584f65619c 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -927,9 +927,6 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&MachineLICMID, false); addPass(&MachineCSEID, false); - // Coalesce basic blocks with the same branch condition - addPass(&BranchCoalescingID); - addPass(&MachineSinkingID); addPass(&PeepholeOptimizerID); diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 7ca4c199900..4aa6dfab525 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(PowerPCCodeGen PPCBoolRetToInt.cpp PPCAsmPrinter.cpp PPCBranchSelector.cpp + PPCBranchCoalescing.cpp PPCCCState.cpp PPCCTRLoops.cpp PPCHazardRecognizers.cpp diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index ad92ac8ce12..40790011f9b 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -41,6 +41,7 @@ namespace llvm { FunctionPass *createPPCVSXSwapRemovalPass(); FunctionPass *createPPCMIPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); + FunctionPass *createPPCBranchCoalescingPass(); FunctionPass *createPPCQPXLoadSplatPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL); FunctionPass *createPPCTLSDynamicCallPass(); diff --git a/lib/CodeGen/BranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp similarity index 88% rename from lib/CodeGen/BranchCoalescing.cpp rename to lib/Target/PowerPC/PPCBranchCoalescing.cpp index 2c41b597843..33085a42361 100644 --- a/lib/CodeGen/BranchCoalescing.cpp +++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -13,6 +13,7 @@ /// //===----------------------------------------------------------------------===// +#include "PPC.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" @@ -27,18 +28,18 @@ using namespace llvm; -#define DEBUG_TYPE "branch-coalescing" - -static cl::opt - EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden, - cl::desc("enable coalescing of duplicate branches")); +#define DEBUG_TYPE "ppc-branch-coalescing" STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced"); STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged"); STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); +namespace llvm { + void initializePPCBranchCoalescingPass(PassRegistry&); +} + //===----------------------------------------------------------------------===// -// BranchCoalescing +// PPCBranchCoalescing //===----------------------------------------------------------------------===// /// /// Improve scheduling by coalescing branches that depend on the same condition. @@ -46,13 +47,17 @@ STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); /// and attempts to merge the blocks together. Such opportunities arise from /// the expansion of select statements in the IR. /// -/// For example, consider the following LLVM IR: +/// This pass does not handle implicit operands on branch statements. In order +/// to run on targets that use implicit operands, changes need to be made in the +/// canCoalesceBranch and canMerge methods. /// -/// %test = icmp eq i32 %x 0 -/// %tmp1 = select i1 %test, double %a, double 2.000000e-03 -/// %tmp2 = select i1 %test, double %b, double 5.000000e-03 +/// Example: the following LLVM IR /// -/// This IR expands to the following machine code on PowerPC: +/// %test = icmp eq i32 %x 0 +/// %tmp1 = select i1 %test, double %a, double 2.000000e-03 +/// %tmp2 = select i1 %test, double %b, double 5.000000e-03 +/// +/// expands to the following machine code: /// /// BB#0: derived from LLVM BB %entry /// Live Ins: %F1 %F3 %X6 @@ -132,7 +137,7 @@ STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); namespace { -class BranchCoalescing : public MachineFunctionPass { +class PPCBranchCoalescing : public MachineFunctionPass { struct CoalescingCandidateInfo { MachineBasicBlock *BranchBlock; // Block containing the branch MachineBasicBlock *BranchTargetBlock; // Block branched to @@ -157,15 +162,11 @@ class BranchCoalescing : public MachineFunctionPass { bool validateCandidates(CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) const; - static bool isBranchCoalescingEnabled() { - return EnableBranchCoalescing == cl::BOU_TRUE; - } - public: static char ID; - BranchCoalescing() : MachineFunctionPass(ID) { - initializeBranchCoalescingPass(*PassRegistry::getPassRegistry()); + PPCBranchCoalescing() : MachineFunctionPass(ID) { + initializePPCBranchCoalescingPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -190,21 +191,25 @@ public: }; } // End anonymous namespace. -char BranchCoalescing::ID = 0; -char &llvm::BranchCoalescingID = BranchCoalescing::ID; +char PPCBranchCoalescing::ID = 0; +/// createPPCBranchCoalescingPass - returns an instance of the Branch Coalescing +/// Pass +FunctionPass *llvm::createPPCBranchCoalescingPass() { + return new PPCBranchCoalescing(); +} -INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(PPCBranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", +INITIALIZE_PASS_END(PPCBranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) -BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() +PPCBranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() : BranchBlock(nullptr), BranchTargetBlock(nullptr), FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {} -void BranchCoalescing::CoalescingCandidateInfo::clear() { +void PPCBranchCoalescing::CoalescingCandidateInfo::clear() { BranchBlock = nullptr; BranchTargetBlock = nullptr; FallThroughBlock = nullptr; @@ -213,7 +218,7 @@ void BranchCoalescing::CoalescingCandidateInfo::clear() { MustMoveUp = false; } -void BranchCoalescing::initialize(MachineFunction &MF) { +void PPCBranchCoalescing::initialize(MachineFunction &MF) { MDT = &getAnalysis(); MPDT = &getAnalysis(); TII = MF.getSubtarget().getInstrInfo(); @@ -230,7 +235,7 @@ void BranchCoalescing::initialize(MachineFunction &MF) { ///\param[in,out] Cand The coalescing candidate to analyze ///\return true if and only if the branch can be coalesced, false otherwise /// -bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { +bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber() << " can be coalesced:"); MachineBasicBlock *FalseMBB = nullptr; @@ -246,6 +251,19 @@ bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { if (!I.isBranch()) continue; + // The analyzeBranch method does not include any implicit operands. + // This is not an issue on PPC but must be handled on other targets. + // For this pass to be made target-independent, the analyzeBranch API + // need to be updated to support implicit operands and there would + // need to be a way to verify that any implicit operands would not be + // clobbered by merging blocks. This would include identifying the + // implicit operands as well as the basic block they are defined in. + // This could be done by changing the analyzeBranch API to have it also + // record and return the implicit operands and the blocks where they are + // defined. Alternatively, the BranchCoalescing code would need to be + // extended to identify the implicit operands. The analysis in canMerge + // must then be extended to prove that none of the implicit operands are + // changed in the blocks that are combined during coalescing. if (I.getNumOperands() != I.getNumExplicitOperands()) { DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I << "\n"); @@ -309,7 +327,7 @@ bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { /// \param[in] OpList2 operand list /// \return true if and only if the operands lists are identical /// -bool BranchCoalescing::identicalOperands( +bool PPCBranchCoalescing::identicalOperands( ArrayRef OpList1, ArrayRef OpList2) const { if (OpList1.size() != OpList2.size()) { @@ -325,6 +343,14 @@ bool BranchCoalescing::identicalOperands( << "Op2: " << Op2 << "\n"); if (Op1.isIdenticalTo(Op2)) { + // filter out instructions with physical-register uses + if (Op1.isReg() && TargetRegisterInfo::isPhysicalRegister(Op1.getReg()) + // If the physical register is constant then we can assume the value + // has not changed between uses. + && !(Op1.isUse() && MRI->isConstantPhysReg(Op1.getReg()))) { + DEBUG(dbgs() << "The operands are not provably identical.\n"); + return false; + } DEBUG(dbgs() << "Op1 and Op2 are identical!\n"); continue; } @@ -349,6 +375,7 @@ bool BranchCoalescing::identicalOperands( return false; } } + return true; } @@ -361,7 +388,7 @@ bool BranchCoalescing::identicalOperands( /// \param[in] SourceMBB block to move PHI instructions from /// \param[in] TargetMBB block to move PHI instructions to /// -void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB, +void PPCBranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB, MachineBasicBlock *TargetMBB) { MachineBasicBlock::iterator MI = SourceMBB->begin(); @@ -394,7 +421,7 @@ void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB, /// \return true if it is safe to move MI to beginning of TargetMBB, /// false otherwise. /// -bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI, +bool PPCBranchCoalescing::canMoveToBeginning(const MachineInstr &MI, const MachineBasicBlock &TargetMBB ) const { @@ -425,7 +452,7 @@ bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI, /// \return true if it is safe to move MI to end of TargetMBB, /// false otherwise. /// -bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI, +bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI, const MachineBasicBlock &TargetMBB ) const { @@ -457,7 +484,7 @@ bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI, /// \return true if all instructions in SourceRegion.BranchBlock can be merged /// into a block in TargetRegion; false otherwise. /// -bool BranchCoalescing::validateCandidates( +bool PPCBranchCoalescing::validateCandidates( CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) const { @@ -500,7 +527,7 @@ bool BranchCoalescing::validateCandidates( /// \return true if all instructions in SourceRegion.BranchBlock can be merged /// into a block in TargetRegion, false otherwise. /// -bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion, +bool PPCBranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) const { if (!validateCandidates(SourceRegion, TargetRegion)) return false; @@ -605,7 +632,7 @@ bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion, /// \param[in] SourceRegion The candidate to move blocks from /// \param[in] TargetRegion The candidate to move blocks to /// -bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, +bool PPCBranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) { if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) { @@ -685,10 +712,9 @@ bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, return true; } -bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) { +bool PPCBranchCoalescing::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction()) || MF.empty() || - !isBranchCoalescingEnabled()) + if (skipFunction(*MF.getFunction()) || MF.empty()) return false; bool didSomething = false; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index bc5d32b37fd..9e146786469 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -40,6 +40,10 @@ using namespace llvm; + +static cl::opt + EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden, + cl::desc("enable coalescing of duplicate branches for PPC")); static cl:: opt DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, cl::desc("Disable CTR loops for PPC")); @@ -378,6 +382,10 @@ bool PPCPassConfig::addInstSelector() { } void PPCPassConfig::addMachineSSAOptimization() { + // PPCBranchCoalescingPass need to be done before machine sinking + // since it merges empty blocks. + if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None) + addPass(createPPCBranchCoalescingPass()); TargetPassConfig::addMachineSSAOptimization(); // For little endian, remove where possible the vector swap instructions // introduced at code generation to normalize vector element order. diff --git a/test/CodeGen/PowerPC/branch_coalesce.ll b/test/CodeGen/PowerPC/branch_coalesce.ll index deb6d898c2e..007eef27b2d 100644 --- a/test/CodeGen/PowerPC/branch_coalesce.ll +++ b/test/CodeGen/PowerPC/branch_coalesce.ll @@ -1,26 +1,19 @@ -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -enable-ppc-branch-coalesce < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -enable-ppc-branch-coalesce < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s ; Function Attrs: nounwind define double @testBranchCoal(double %a, double %b, double %c, i32 %x) { -entry: - %test = icmp eq i32 %x, 0 - %tmp1 = select i1 %test, double %a, double 2.000000e-03 - %tmp2 = select i1 %test, double %b, double 0.000000e+00 - %tmp3 = select i1 %test, double %c, double 5.000000e-03 - %res1 = fadd double %tmp1, %tmp2 - %result = fadd double %res1, %tmp3 - ret double %result - -; CHECK-LABEL: @testBranchCoal +; CHECK-LABEL: @testBranchCoal ; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0 ; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]] ; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha ; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha ; CHECK-DAG: xxlxor 2, 2, 2 -; CHECK-NOT: beq -; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] +; CHECK-NOT: beq +; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] ; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]] ; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]] ; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]] @@ -28,4 +21,40 @@ entry: ; CHECK: xsadddp 0, 1, 2 ; CHECK: xsadddp 1, 0, 3 ; CHECK: blr + +; CHECK-NOCOALESCE-LABEL: testBranchCoal: +; CHECK-NOCOALESCE: # BB#0: # %entry +; CHECK-NOCOALESCE-NEXT: cmplwi 0, 6, 0 +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_5 +; CHECK-NOCOALESCE-NEXT: # BB#1: # %entry +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_6 +; CHECK-NOCOALESCE-NEXT: .LBB0_2: # %entry +; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_4 +; CHECK-NOCOALESCE-NEXT: .LBB0_3: # %entry +; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NOCOALESCE-NEXT: lxsdx 3, 0, 3 +; CHECK-NOCOALESCE-NEXT: .LBB0_4: # %entry +; CHECK-NOCOALESCE-NEXT: xsadddp 0, 1, 2 +; CHECK-NOCOALESCE-NEXT: xsadddp 1, 0, 3 +; CHECK-NOCOALESCE-NEXT: blr +; CHECK-NOCOALESCE-NEXT: .LBB0_5: # %entry +; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NOCOALESCE-NEXT: lxsdx 1, 0, 3 +; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_2 +; CHECK-NOCOALESCE-NEXT: .LBB0_6: # %entry +; CHECK-NOCOALESCE-NEXT: xxlxor 2, 2, 2 +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_3 +; CHECK-NOCOALESCE-NEXT: b .LBB0_4 + entry: + + %test = icmp eq i32 %x, 0 + %tmp1 = select i1 %test, double %a, double 2.000000e-03 + %tmp2 = select i1 %test, double %b, double 0.000000e+00 + %tmp3 = select i1 %test, double %c, double 5.000000e-03 + + %res1 = fadd double %tmp1, %tmp2 + %result = fadd double %res1, %tmp3 + ret double %result }