From f3470b10da7a0e8fb47740af2cfca1a5acd86a8f Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 14 Feb 2017 21:02:24 +0000 Subject: [PATCH] [BranchFolding] Tail common all identical unreachable blocks Summary: Blocks ending in unreachable are typically cold because they end the program or throw an exception, so merging them with other identical blocks is usually profitable because it reduces the size of cold code. MachineBlockPlacement generally does not arrange to fall through to such blocks, so commoning these blocks will not introduce additional unconditional branches. Reviewers: hans, iteratee, haicheng Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D29153 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295105 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 20 ++++++ test/CodeGen/ARM/tail-opts.ll | 52 +++++++++++++++ test/CodeGen/X86/tail-opts.ll | 119 +++++++++++++++++++++++++++++----- 3 files changed, 175 insertions(+), 16 deletions(-) diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index b196bf98d27..d148a9dbac5 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -523,6 +523,17 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, return NumTerms; } +/// A no successor, non-return block probably ends in unreachable and is cold. +/// Also consider a block that ends in an indirect branch to be a return block, +/// since many targets use plain indirect branches to return. +static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) { + if (!MBB->succ_empty()) + return false; + if (MBB->empty()) + return true; + return !(MBB->back().isReturn() || MBB->back().isIndirectBranch()); +} + /// ProfitableToMerge - Check if two machine basic blocks have a common tail /// and decide if it would be profitable to merge those tails. Return the /// length of the common tail and iterators to the first common instruction @@ -577,6 +588,15 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, return true; } + // If these are identical non-return blocks with no successors, merge them. + // Such blocks are typically cold calls to noreturn functions like abort, and + // are unlikely to become a fallthrough target after machine block placement. + // Tail merging these blocks is unlikely to create additional unconditional + // branches, and will reduce the size of this cold code. + if (I1 == MBB1->begin() && I2 == MBB2->begin() && + blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2)) + return true; + // If one of the blocks can be completely merged and happens to be in // a position where the other could fall through into it, merge any number // of instructions, because it can be done without a branch. diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll index 37e9a4af3be..475b80b3bb0 100644 --- a/test/CodeGen/ARM/tail-opts.ll +++ b/test/CodeGen/ARM/tail-opts.ll @@ -65,3 +65,55 @@ altret: call void @far(i32 1001) ret void } + +; Use alternating abort functions so that the blocks we wish to merge are not +; layout successors during branch folding. + +; CHECK-LABEL: merge_alternating_aborts: +; CHECK-NOT: _abort +; CHECK-NOT: _alt_abort +; CHECK: bxne lr +; CHECK-NOT: _abort +; CHECK-NOT: _alt_abort +; CHECK: LBB{{.*}}: +; CHECK: mov lr, pc +; CHECK: b _alt_abort +; CHECK-NOT: _abort +; CHECK-NOT: _alt_abort +; CHECK: LBB{{.*}}: +; CHECK: mov lr, pc +; CHECK: b _abort +; CHECK-NOT: _abort +; CHECK-NOT: _alt_abort + +declare void @abort() +declare void @alt_abort() + +define void @merge_alternating_aborts() { +entry: + %c1 = call i1 @qux() + br i1 %c1, label %cont1, label %abort1 +abort1: + call void @abort() + unreachable +cont1: + %c2 = call i1 @qux() + br i1 %c2, label %cont2, label %abort2 +abort2: + call void @alt_abort() + unreachable +cont2: + %c3 = call i1 @qux() + br i1 %c3, label %cont3, label %abort3 +abort3: + call void @abort() + unreachable +cont3: + %c4 = call i1 @qux() + br i1 %c4, label %cont4, label %abort4 +abort4: + call void @alt_abort() + unreachable +cont4: + ret void +} diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll index 12c90c1a5fa..e7797426d89 100644 --- a/test/CodeGen/X86/tail-opts.ll +++ b/test/CodeGen/X86/tail-opts.ll @@ -299,33 +299,35 @@ declare void @func() ; one - One instruction may be tail-duplicated even with optsize. ; CHECK-LABEL: one: -; CHECK: movl $0, XYZ(%rip) -; CHECK: movl $0, XYZ(%rip) +; CHECK: j{{.*}} tail_call_me +; CHECK: j{{.*}} tail_call_me @XYZ = external global i32 -define void @one() nounwind optsize { +declare void @tail_call_me() + +define void @one(i32 %v) nounwind optsize { entry: - %0 = icmp eq i32 undef, 0 + %0 = icmp eq i32 %v, 0 br i1 %0, label %bbx, label %bby bby: - switch i32 undef, label %bb7 [ + switch i32 %v, label %bb7 [ i32 16, label %return ] bb7: - store volatile i32 0, i32* @XYZ - unreachable + tail call void @tail_call_me() + ret void bbx: - switch i32 undef, label %bb12 [ + switch i32 %v, label %bb12 [ i32 128, label %return ] bb12: - store volatile i32 0, i32* @XYZ - unreachable + tail call void @tail_call_me() + ret void return: ret void @@ -414,9 +416,9 @@ return: ; CHECK-LABEL: two_nosize: ; CHECK: movl $0, XYZ(%rip) -; CHECK: movl $1, XYZ(%rip) +; CHECK: jmp tail_call_me ; CHECK: movl $0, XYZ(%rip) -; CHECK: movl $1, XYZ(%rip) +; CHECK: jmp tail_call_me define void @two_nosize() nounwind { entry: @@ -430,8 +432,8 @@ bby: bb7: store volatile i32 0, i32* @XYZ - store volatile i32 1, i32* @XYZ - unreachable + tail call void @tail_call_me() + ret void bbx: switch i32 undef, label %bb12 [ @@ -440,8 +442,8 @@ bbx: bb12: store volatile i32 0, i32* @XYZ - store volatile i32 1, i32* @XYZ - unreachable + tail call void @tail_call_me() + ret void return: ret void @@ -469,3 +471,88 @@ bb.nph: ; preds = %entry for.end: ; preds = %entry ret i64 %varx.0 } + +; We should tail merge small blocks that don't end in a tail call or return +; instruction. Those blocks are typically unreachable and will be placed +; out-of-line after the main return, so we should try to eliminate as many of +; them as possible. + +; CHECK-LABEL: merge_aborts: +; CHECK-NOT: callq abort +; CHECK: ret +; CHECK: callq abort +; CHECK-NOT: callq abort +; CHECK: .Lfunc_end{{.*}}: + +declare void @abort() +define void @merge_aborts() { +entry: + %c1 = call i1 @qux() + br i1 %c1, label %cont1, label %abort1 +abort1: + call void @abort() + unreachable +cont1: + %c2 = call i1 @qux() + br i1 %c2, label %cont2, label %abort2 +abort2: + call void @abort() + unreachable +cont2: + %c3 = call i1 @qux() + br i1 %c3, label %cont3, label %abort3 +abort3: + call void @abort() + unreachable +cont3: + %c4 = call i1 @qux() + br i1 %c4, label %cont4, label %abort4 +abort4: + call void @abort() + unreachable +cont4: + ret void +} + +; Use alternating abort functions so that the blocks we wish to merge are not +; layout successors during branch folding. + +; CHECK-LABEL: merge_alternating_aborts: +; CHECK-NOT: callq abort +; CHECK: ret +; CHECK: callq abort +; CHECK: callq alt_abort +; CHECK-NOT: callq abort +; CHECK-NOT: callq alt_abort +; CHECK: .Lfunc_end{{.*}}: + +declare void @alt_abort() + +define void @merge_alternating_aborts() { +entry: + %c1 = call i1 @qux() + br i1 %c1, label %cont1, label %abort1 +abort1: + call void @abort() + unreachable +cont1: + %c2 = call i1 @qux() + br i1 %c2, label %cont2, label %abort2 +abort2: + call void @alt_abort() + unreachable +cont2: + %c3 = call i1 @qux() + br i1 %c3, label %cont3, label %abort3 +abort3: + call void @abort() + unreachable +cont3: + %c4 = call i1 @qux() + br i1 %c4, label %cont4, label %abort4 +abort4: + call void @alt_abort() + unreachable +cont4: + ret void +} -- 2.40.0