From: Hans Wennborg Date: Tue, 28 May 2019 12:19:38 +0000 (+0000) Subject: Re-commit r357452 (take 2): "SimplifyCFG SinkCommonCodeFromPredecessors: Also sink... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=afeb42acc3c4295cc6d85e93e129e8f46f306a22;p=llvm Re-commit r357452 (take 2): "SimplifyCFG SinkCommonCodeFromPredecessors: Also sink function calls without used results (PR41259)" This was reverted in r360086 as it was supected of causing mysterious test failures internally. However, it was never concluded that this patch was the root cause. > The code was previously checking that candidates for sinking had exactly > one use or were a store instruction (which can't have uses). This meant > we could sink call instructions only if they had a use. > > That limitation seemed a bit arbitrary, so this patch changes it to > "instruction has zero or one use" which seems more natural and removes > the need to special-case stores. > > Differential revision: https://reviews.llvm.org/D59936 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361811 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 90b552035af..69df6549cb1 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1445,9 +1445,10 @@ HoistTerminator: static bool canSinkInstructions( ArrayRef Insts, DenseMap> &PHIOperands) { - // Prune out obviously bad instructions to move. Any non-store instruction - // must have exactly one use, and we check later that use is by a single, - // common PHI instruction in the successor. + // Prune out obviously bad instructions to move. Each instruction must have + // exactly zero or one use, and we check later that use is by a single, common + // PHI instruction in the successor. + bool HasUse = !Insts.front()->user_empty(); for (auto *I : Insts) { // These instructions may change or break semantics if moved. if (isa(I) || I->isEHPad() || isa(I) || @@ -1461,9 +1462,10 @@ static bool canSinkInstructions( if (C->isInlineAsm()) return false; - // Everything must have only one use too, apart from stores which - // have no uses. - if (!isa(I) && !I->hasOneUse()) + // Each instruction must have zero or one use. + if (HasUse && !I->hasOneUse()) + return false; + if (!HasUse && !I->user_empty()) return false; } @@ -1472,11 +1474,11 @@ static bool canSinkInstructions( if (!I->isSameOperationAs(I0)) return false; - // All instructions in Insts are known to be the same opcode. If they aren't - // stores, check the only user of each is a PHI or in the same block as the - // instruction, because if a user is in the same block as an instruction - // we're contemplating sinking, it must already be determined to be sinkable. - if (!isa(I0)) { + // All instructions in Insts are known to be the same opcode. If they have a + // use, check that the only user is a PHI or in the same block as the + // instruction, because if a user is in the same block as an instruction we're + // contemplating sinking, it must already be determined to be sinkable. + if (HasUse) { auto *PNUse = dyn_cast(*I0->user_begin()); auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0); if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool { @@ -1554,7 +1556,7 @@ static bool sinkLastInstruction(ArrayRef Blocks) { // it is slightly over-aggressive - it gets confused by commutative instructions // so double-check it here. Instruction *I0 = Insts.front(); - if (!isa(I0)) { + if (!I0->user_empty()) { auto *PNUse = dyn_cast(*I0->user_begin()); if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool { auto *U = cast(*I->user_begin()); @@ -1612,11 +1614,10 @@ static bool sinkLastInstruction(ArrayRef Blocks) { I0->andIRFlags(I); } - if (!isa(I0)) { + if (!I0->user_empty()) { // canSinkLastInstruction checked that all instructions were used by // one and only one PHI node. Find that now, RAUW it to our common // instruction and nuke it. - assert(I0->hasOneUse()); auto *PN = cast(*I0->user_begin()); PN->replaceAllUsesWith(I0); PN->eraseFromParent(); diff --git a/test/CodeGen/AArch64/max-jump-table.ll b/test/CodeGen/AArch64/max-jump-table.ll index 44dde7b1cd0..f309efe95b5 100644 --- a/test/CodeGen/AArch64/max-jump-table.ll +++ b/test/CodeGen/AArch64/max-jump-table.ll @@ -4,7 +4,7 @@ ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m1 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM1 < %t ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t -declare void @ext(i32) +declare void @ext(i32, i32) define i32 @jt1(i32 %a, i32 %b) { entry: @@ -45,23 +45,23 @@ entry: ; CHECKM3-NEXT: %jump-table.0: ; CHECKM3-NOT: %jump-table.1: -bb1: tail call void @ext(i32 0) br label %return -bb2: tail call void @ext(i32 2) br label %return -bb3: tail call void @ext(i32 4) br label %return -bb4: tail call void @ext(i32 6) br label %return -bb5: tail call void @ext(i32 8) br label %return -bb6: tail call void @ext(i32 10) br label %return -bb7: tail call void @ext(i32 12) br label %return -bb8: tail call void @ext(i32 14) br label %return -bb9: tail call void @ext(i32 16) br label %return -bb10: tail call void @ext(i32 18) br label %return -bb11: tail call void @ext(i32 20) br label %return -bb12: tail call void @ext(i32 22) br label %return -bb13: tail call void @ext(i32 24) br label %return -bb14: tail call void @ext(i32 26) br label %return -bb15: tail call void @ext(i32 28) br label %return -bb16: tail call void @ext(i32 30) br label %return -bb17: tail call void @ext(i32 32) br label %return +bb1: tail call void @ext(i32 1, i32 0) br label %return +bb2: tail call void @ext(i32 2, i32 2) br label %return +bb3: tail call void @ext(i32 3, i32 4) br label %return +bb4: tail call void @ext(i32 4, i32 6) br label %return +bb5: tail call void @ext(i32 5, i32 8) br label %return +bb6: tail call void @ext(i32 6, i32 10) br label %return +bb7: tail call void @ext(i32 7, i32 12) br label %return +bb8: tail call void @ext(i32 8, i32 14) br label %return +bb9: tail call void @ext(i32 9, i32 16) br label %return +bb10: tail call void @ext(i32 1, i32 18) br label %return +bb11: tail call void @ext(i32 2, i32 20) br label %return +bb12: tail call void @ext(i32 3, i32 22) br label %return +bb13: tail call void @ext(i32 4, i32 24) br label %return +bb14: tail call void @ext(i32 5, i32 26) br label %return +bb15: tail call void @ext(i32 6, i32 28) br label %return +bb16: tail call void @ext(i32 7, i32 30) br label %return +bb17: tail call void @ext(i32 8, i32 32) br label %return return: ret i32 %b } @@ -91,11 +91,11 @@ entry: ; CHECKM3-NOT: %jump-table.1 ; CHECK-DAG: End machine code for function jt2. -bb1: tail call void @ext(i32 1) br label %return -bb2: tail call void @ext(i32 2) br label %return -bb3: tail call void @ext(i32 3) br label %return -bb4: tail call void @ext(i32 4) br label %return -bb5: tail call void @ext(i32 5) br label %return -bb6: tail call void @ext(i32 6) br label %return +bb1: tail call void @ext(i32 6, i32 1) br label %return +bb2: tail call void @ext(i32 5, i32 2) br label %return +bb3: tail call void @ext(i32 4, i32 3) br label %return +bb4: tail call void @ext(i32 3, i32 4) br label %return +bb5: tail call void @ext(i32 2, i32 5) br label %return +bb6: tail call void @ext(i32 1, i32 6) br label %return return: ret void } diff --git a/test/CodeGen/AArch64/min-jump-table.ll b/test/CodeGen/AArch64/min-jump-table.ll index 7d6d26259af..8d16a4d9d6a 100644 --- a/test/CodeGen/AArch64/min-jump-table.ll +++ b/test/CodeGen/AArch64/min-jump-table.ll @@ -2,7 +2,7 @@ ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t -declare void @ext(i32) +declare void @ext(i32, i32) define i32 @jt2(i32 %a, i32 %b) { entry: @@ -17,8 +17,8 @@ entry: ; CHECK4-NOT: {{^}}Jump Tables: ; CHECK8-NOT: {{^}}Jump Tables: -bb1: tail call void @ext(i32 0) br label %return -bb2: tail call void @ext(i32 2) br label %return +bb1: tail call void @ext(i32 1, i32 0) br label %return +bb2: tail call void @ext(i32 2, i32 2) br label %return return: ret i32 %b } @@ -40,10 +40,10 @@ entry: ; CHECK4-NOT: %jump-table.1: ; CHECK8-NOT: {{^}}Jump Tables: -bb1: tail call void @ext(i32 0) br label %return -bb2: tail call void @ext(i32 2) br label %return -bb3: tail call void @ext(i32 4) br label %return -bb4: tail call void @ext(i32 6) br label %return +bb1: tail call void @ext(i32 1, i32 0) br label %return +bb2: tail call void @ext(i32 3, i32 2) br label %return +bb3: tail call void @ext(i32 4, i32 4) br label %return +bb4: tail call void @ext(i32 5, i32 6) br label %return return: ret i32 %b } @@ -65,14 +65,14 @@ entry: ; CHECK-NEXT: %jump-table.0: ; CHECK-NOT: %jump-table.1: -bb1: tail call void @ext(i32 0) br label %return -bb2: tail call void @ext(i32 2) br label %return -bb3: tail call void @ext(i32 4) br label %return -bb4: tail call void @ext(i32 6) br label %return -bb5: tail call void @ext(i32 8) br label %return -bb6: tail call void @ext(i32 10) br label %return -bb7: tail call void @ext(i32 12) br label %return -bb8: tail call void @ext(i32 14) br label %return +bb1: tail call void @ext(i32 1, i32 0) br label %return +bb2: tail call void @ext(i32 2, i32 2) br label %return +bb3: tail call void @ext(i32 3, i32 4) br label %return +bb4: tail call void @ext(i32 4, i32 6) br label %return +bb5: tail call void @ext(i32 5, i32 8) br label %return +bb6: tail call void @ext(i32 6, i32 10) br label %return +bb7: tail call void @ext(i32 7, i32 12) br label %return +bb8: tail call void @ext(i32 8, i32 14) br label %return return: ret i32 %b } diff --git a/test/CodeGen/AArch64/win64-jumptable.ll b/test/CodeGen/AArch64/win64-jumptable.ll index 4eb86ea663f..6a9752687aa 100644 --- a/test/CodeGen/AArch64/win64-jumptable.ll +++ b/test/CodeGen/AArch64/win64-jumptable.ll @@ -10,43 +10,43 @@ entry: i32 3, label %sw.bb3 ] -sw.bb: ; preds = %entry - tail call void @g(i32 0) #2 +sw.bb: + tail call void @g(i32 0, i32 4) br label %sw.epilog -sw.bb1: ; preds = %entry - tail call void @g(i32 1) #2 +sw.bb1: + tail call void @g(i32 1, i32 5) br label %sw.epilog -sw.bb2: ; preds = %entry - tail call void @g(i32 2) #2 +sw.bb2: + tail call void @g(i32 2, i32 6) br label %sw.epilog -sw.bb3: ; preds = %entry - tail call void @g(i32 3) #2 +sw.bb3: + tail call void @g(i32 3, i32 7) br label %sw.epilog -sw.epilog: ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb - tail call void @g(i32 10) #2 +sw.epilog: + tail call void @g(i32 10, i32 8) ret void } -declare void @g(i32) - -; CHECK: .text -; CHECK: f: -; CHECK: .seh_proc f -; CHECK: b g -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: .LJTI0_0: -; CHECK: .word .LBB0_2-.LJTI0_0 -; CHECK: .word .LBB0_3-.LJTI0_0 -; CHECK: .word .LBB0_4-.LJTI0_0 -; CHECK: .word .LBB0_5-.LJTI0_0 -; CHECK: .section .xdata,"dr" -; CHECK: .seh_handlerdata -; CHECK: .text -; CHECK: .seh_endproc +declare void @g(i32, i32) + +; CHECK: .text +; CHECK: f: +; CHECK: .seh_proc f +; CHECK: b g +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: .LJTI0_0: +; CHECK: .word .LBB0_2-.LJTI0_0 +; CHECK: .word .LBB0_3-.LJTI0_0 +; CHECK: .word .LBB0_4-.LJTI0_0 +; CHECK: .word .LBB0_5-.LJTI0_0 +; CHECK: .section .xdata,"dr" +; CHECK: .seh_handlerdata +; CHECK: .text +; CHECK: .seh_endproc ; Check that we can emit an object file with correct unwind info. ; UNWIND: FunctionLength: {{[1-9][0-9]*}} diff --git a/test/CodeGen/ARM/cmpxchg-idioms.ll b/test/CodeGen/ARM/cmpxchg-idioms.ll index 283202f0cc1..1af80e7d0c8 100644 --- a/test/CodeGen/ARM/cmpxchg-idioms.ll +++ b/test/CodeGen/ARM/cmpxchg-idioms.ll @@ -20,14 +20,14 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) { ; CHECK: [[FAILED]]: ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} ; CHECK: clrex -; CHECK: dmb ish ; CHECK: movs r0, #0 +; CHECK: dmb ish ; CHECK: bx lr ; CHECK: [[SUCCESS]]: ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} -; CHECK: dmb ish ; CHECK: movs r0, #1 +; CHECK: dmb ish ; CHECK: bx lr %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll index 02c29bd3540..12a3e59cd37 100644 --- a/test/Transforms/SimplifyCFG/sink-common-code.ll +++ b/test/Transforms/SimplifyCFG/sink-common-code.ll @@ -843,6 +843,50 @@ if.end: ; CHECK: insertvalue ; CHECK-NOT: insertvalue + +declare void @baz(i32) + +define void @test_sink_void_calls(i32 %x) { +entry: + switch i32 %x, label %default [ + i32 0, label %bb0 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + ] +bb0: + call void @baz(i32 12) + br label %return +bb1: + call void @baz(i32 34) + br label %return +bb2: + call void @baz(i32 56) + br label %return +bb3: + call void @baz(i32 78) + br label %return +bb4: + call void @baz(i32 90) + br label %return +default: + unreachable +return: + ret void + +; Check that the calls get sunk to the return block. +; We would previously not sink calls without uses, see PR41259. +; CHECK-LABEL: @test_sink_void_calls +; CHECK-NOT: call +; CHECK-LABEL: return: +; CHECK: phi +; CHECK: call +; CHECK-NOT: call +; CHECK: ret +} + + ; CHECK: ![[$TBAA]] = !{![[TYPE:[0-9]]], ![[TYPE]], i64 0} ; CHECK: ![[TYPE]] = !{!"float", ![[TEXT:[0-9]]]} ; CHECK: ![[TEXT]] = !{!"an example type tree"}