From: Hans Wennborg <hans@hanshq.net>
Date: Tue, 28 May 2019 12:19:38 +0000 (+0000)
Subject: Re-commit r357452 (take 2): "SimplifyCFG SinkCommonCodeFromPredecessors: Also sink... 
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=afeb42acc3c4295cc6d85e93e129e8f46f306a22;p=llvm

Re-commit r357452 (take 2): "SimplifyCFG SinkCommonCodeFromPredecessors: Also sink function calls without used results (PR41259)"

This was reverted in r360086 as it was supected of causing mysterious test
failures internally. However, it was never concluded that this patch was the
root cause.

> The code was previously checking that candidates for sinking had exactly
> one use or were a store instruction (which can't have uses). This meant
> we could sink call instructions only if they had a use.
>
> That limitation seemed a bit arbitrary, so this patch changes it to
> "instruction has zero or one use" which seems more natural and removes
> the need to special-case stores.
>
> Differential revision: https://reviews.llvm.org/D59936

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361811 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 90b552035af..69df6549cb1 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1445,9 +1445,10 @@ HoistTerminator:
 static bool canSinkInstructions(
     ArrayRef<Instruction *> Insts,
     DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
-  // Prune out obviously bad instructions to move. Any non-store instruction
-  // must have exactly one use, and we check later that use is by a single,
-  // common PHI instruction in the successor.
+  // Prune out obviously bad instructions to move. Each instruction must have
+  // exactly zero or one use, and we check later that use is by a single, common
+  // PHI instruction in the successor.
+  bool HasUse = !Insts.front()->user_empty();
   for (auto *I : Insts) {
     // These instructions may change or break semantics if moved.
     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
@@ -1461,9 +1462,10 @@ static bool canSinkInstructions(
       if (C->isInlineAsm())
         return false;
 
-    // Everything must have only one use too, apart from stores which
-    // have no uses.
-    if (!isa<StoreInst>(I) && !I->hasOneUse())
+    // Each instruction must have zero or one use.
+    if (HasUse && !I->hasOneUse())
+      return false;
+    if (!HasUse && !I->user_empty())
       return false;
   }
 
@@ -1472,11 +1474,11 @@ static bool canSinkInstructions(
     if (!I->isSameOperationAs(I0))
       return false;
 
-  // All instructions in Insts are known to be the same opcode. If they aren't
-  // stores, check the only user of each is a PHI or in the same block as the
-  // instruction, because if a user is in the same block as an instruction
-  // we're contemplating sinking, it must already be determined to be sinkable.
-  if (!isa<StoreInst>(I0)) {
+  // All instructions in Insts are known to be the same opcode. If they have a
+  // use, check that the only user is a PHI or in the same block as the
+  // instruction, because if a user is in the same block as an instruction we're
+  // contemplating sinking, it must already be determined to be sinkable.
+  if (HasUse) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
     auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
     if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
@@ -1554,7 +1556,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   // it is slightly over-aggressive - it gets confused by commutative instructions
   // so double-check it here.
   Instruction *I0 = Insts.front();
-  if (!isa<StoreInst>(I0)) {
+  if (!I0->user_empty()) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
     if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
           auto *U = cast<Instruction>(*I->user_begin());
@@ -1612,11 +1614,10 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
       I0->andIRFlags(I);
     }
 
-  if (!isa<StoreInst>(I0)) {
+  if (!I0->user_empty()) {
     // canSinkLastInstruction checked that all instructions were used by
     // one and only one PHI node. Find that now, RAUW it to our common
     // instruction and nuke it.
-    assert(I0->hasOneUse());
     auto *PN = cast<PHINode>(*I0->user_begin());
     PN->replaceAllUsesWith(I0);
     PN->eraseFromParent();
diff --git a/test/CodeGen/AArch64/max-jump-table.ll b/test/CodeGen/AArch64/max-jump-table.ll
index 44dde7b1cd0..f309efe95b5 100644
--- a/test/CodeGen/AArch64/max-jump-table.ll
+++ b/test/CodeGen/AArch64/max-jump-table.ll
@@ -4,7 +4,7 @@
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m1        -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM1 < %t
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3        -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t
 
-declare void @ext(i32)
+declare void @ext(i32, i32)
 
 define i32 @jt1(i32 %a, i32 %b) {
 entry:
@@ -45,23 +45,23 @@ entry:
 ; CHECKM3-NEXT: %jump-table.0:
 ; CHECKM3-NOT: %jump-table.1:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
-bb5: tail call void @ext(i32 8) br label %return
-bb6: tail call void @ext(i32 10) br label %return
-bb7: tail call void @ext(i32 12) br label %return
-bb8: tail call void @ext(i32 14) br label %return
-bb9: tail call void @ext(i32 16) br label %return
-bb10: tail call void @ext(i32 18) br label %return
-bb11: tail call void @ext(i32 20) br label %return
-bb12: tail call void @ext(i32 22) br label %return
-bb13: tail call void @ext(i32 24) br label %return
-bb14: tail call void @ext(i32 26) br label %return
-bb15: tail call void @ext(i32 28) br label %return
-bb16: tail call void @ext(i32 30) br label %return
-bb17: tail call void @ext(i32 32) br label %return
+bb1: tail call void  @ext(i32 1, i32 0) br label %return
+bb2: tail call void  @ext(i32 2, i32 2) br label %return
+bb3: tail call void  @ext(i32 3, i32 4) br label %return
+bb4: tail call void  @ext(i32 4, i32 6) br label %return
+bb5: tail call void  @ext(i32 5, i32 8) br label %return
+bb6: tail call void  @ext(i32 6, i32 10) br label %return
+bb7: tail call void  @ext(i32 7, i32 12) br label %return
+bb8: tail call void  @ext(i32 8, i32 14) br label %return
+bb9: tail call void  @ext(i32 9, i32 16) br label %return
+bb10: tail call void @ext(i32 1, i32 18) br label %return
+bb11: tail call void @ext(i32 2, i32 20) br label %return
+bb12: tail call void @ext(i32 3, i32 22) br label %return
+bb13: tail call void @ext(i32 4, i32 24) br label %return
+bb14: tail call void @ext(i32 5, i32 26) br label %return
+bb15: tail call void @ext(i32 6, i32 28) br label %return
+bb16: tail call void @ext(i32 7, i32 30) br label %return
+bb17: tail call void @ext(i32 8, i32 32) br label %return
 
 return: ret i32 %b
 }
@@ -91,11 +91,11 @@ entry:
 ; CHECKM3-NOT: %jump-table.1
 ; CHECK-DAG: End machine code for function jt2.
 
-bb1: tail call void @ext(i32 1) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 3) br label %return
-bb4: tail call void @ext(i32 4) br label %return
-bb5: tail call void @ext(i32 5) br label %return
-bb6: tail call void @ext(i32 6) br label %return
+bb1: tail call void @ext(i32 6, i32 1) br label %return
+bb2: tail call void @ext(i32 5, i32 2) br label %return
+bb3: tail call void @ext(i32 4, i32 3) br label %return
+bb4: tail call void @ext(i32 3, i32 4) br label %return
+bb5: tail call void @ext(i32 2, i32 5) br label %return
+bb6: tail call void @ext(i32 1, i32 6) br label %return
 return: ret void
 }
diff --git a/test/CodeGen/AArch64/min-jump-table.ll b/test/CodeGen/AArch64/min-jump-table.ll
index 7d6d26259af..8d16a4d9d6a 100644
--- a/test/CodeGen/AArch64/min-jump-table.ll
+++ b/test/CodeGen/AArch64/min-jump-table.ll
@@ -2,7 +2,7 @@
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
 
-declare void @ext(i32)
+declare void @ext(i32, i32)
 
 define i32 @jt2(i32 %a, i32 %b) {
 entry:
@@ -17,8 +17,8 @@ entry:
 ; CHECK4-NOT: {{^}}Jump Tables:
 ; CHECK8-NOT: {{^}}Jump Tables:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
 
 return: ret i32 %b
 }
@@ -40,10 +40,10 @@ entry:
 ; CHECK4-NOT: %jump-table.1:
 ; CHECK8-NOT: {{^}}Jump Tables:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 3, i32 2) br label %return
+bb3: tail call void @ext(i32 4, i32 4) br label %return
+bb4: tail call void @ext(i32 5, i32 6) br label %return
 
 return: ret i32 %b
 }
@@ -65,14 +65,14 @@ entry:
 ; CHECK-NEXT: %jump-table.0:
 ; CHECK-NOT: %jump-table.1:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
-bb5: tail call void @ext(i32 8) br label %return
-bb6: tail call void @ext(i32 10) br label %return
-bb7: tail call void @ext(i32 12) br label %return
-bb8: tail call void @ext(i32 14) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
+bb3: tail call void @ext(i32 3, i32 4) br label %return
+bb4: tail call void @ext(i32 4, i32 6) br label %return
+bb5: tail call void @ext(i32 5, i32 8) br label %return
+bb6: tail call void @ext(i32 6, i32 10) br label %return
+bb7: tail call void @ext(i32 7, i32 12) br label %return
+bb8: tail call void @ext(i32 8, i32 14) br label %return
 
 return: ret i32 %b
 }
diff --git a/test/CodeGen/AArch64/win64-jumptable.ll b/test/CodeGen/AArch64/win64-jumptable.ll
index 4eb86ea663f..6a9752687aa 100644
--- a/test/CodeGen/AArch64/win64-jumptable.ll
+++ b/test/CodeGen/AArch64/win64-jumptable.ll
@@ -10,43 +10,43 @@ entry:
     i32 3, label %sw.bb3
   ]
 
-sw.bb:                                            ; preds = %entry
-  tail call void @g(i32 0) #2
+sw.bb:
+  tail call void @g(i32 0, i32 4)
   br label %sw.epilog
 
-sw.bb1:                                           ; preds = %entry
-  tail call void @g(i32 1) #2
+sw.bb1:
+  tail call void @g(i32 1, i32 5)
   br label %sw.epilog
 
-sw.bb2:                                           ; preds = %entry
-  tail call void @g(i32 2) #2
+sw.bb2:
+  tail call void @g(i32 2, i32 6)
   br label %sw.epilog
 
-sw.bb3:                                           ; preds = %entry
-  tail call void @g(i32 3) #2
+sw.bb3:
+  tail call void @g(i32 3, i32 7)
   br label %sw.epilog
 
-sw.epilog:                                        ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
-  tail call void @g(i32 10) #2
+sw.epilog:
+  tail call void @g(i32 10, i32 8)
   ret void
 }
 
-declare void @g(i32)
-
-; CHECK:		.text
-; CHECK:		f:
-; CHECK:		.seh_proc f
-; CHECK:		b	g
-; CHECK-NEXT:	.p2align	2
-; CHECK-NEXT:	.LJTI0_0:
-; CHECK:		.word	.LBB0_2-.LJTI0_0
-; CHECK:		.word	.LBB0_3-.LJTI0_0
-; CHECK:		.word	.LBB0_4-.LJTI0_0
-; CHECK:		.word	.LBB0_5-.LJTI0_0
-; CHECK:		.section	.xdata,"dr"
-; CHECK:		.seh_handlerdata
-; CHECK:		.text
-; CHECK:		.seh_endproc
+declare void @g(i32, i32)
+
+; CHECK:    .text
+; CHECK:    f:
+; CHECK:    .seh_proc f
+; CHECK:    b g
+; CHECK-NEXT: .p2align  2
+; CHECK-NEXT: .LJTI0_0:
+; CHECK:    .word .LBB0_2-.LJTI0_0
+; CHECK:    .word .LBB0_3-.LJTI0_0
+; CHECK:    .word .LBB0_4-.LJTI0_0
+; CHECK:    .word .LBB0_5-.LJTI0_0
+; CHECK:    .section  .xdata,"dr"
+; CHECK:    .seh_handlerdata
+; CHECK:    .text
+; CHECK:    .seh_endproc
 
 ; Check that we can emit an object file with correct unwind info.
 ; UNWIND: FunctionLength: {{[1-9][0-9]*}}
diff --git a/test/CodeGen/ARM/cmpxchg-idioms.ll b/test/CodeGen/ARM/cmpxchg-idioms.ll
index 283202f0cc1..1af80e7d0c8 100644
--- a/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -20,14 +20,14 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 ; CHECK: [[FAILED]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
 ; CHECK: clrex
-; CHECK: dmb ish
 ; CHECK: movs r0, #0
+; CHECK: dmb ish
 ; CHECK: bx lr
 
 ; CHECK: [[SUCCESS]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
 ; CHECK: movs r0, #1
+; CHECK: dmb ish
 ; CHECK: bx lr
 
   %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll
index 02c29bd3540..12a3e59cd37 100644
--- a/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -843,6 +843,50 @@ if.end:
 ; CHECK: insertvalue
 ; CHECK-NOT: insertvalue
 
+
+declare void @baz(i32)
+
+define void @test_sink_void_calls(i32 %x) {
+entry:
+  switch i32 %x, label %default [
+    i32 0, label %bb0
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+  ]
+bb0:
+  call void @baz(i32 12)
+  br label %return
+bb1:
+  call void @baz(i32 34)
+  br label %return
+bb2:
+  call void @baz(i32 56)
+  br label %return
+bb3:
+  call void @baz(i32 78)
+  br label %return
+bb4:
+  call void @baz(i32 90)
+  br label %return
+default:
+  unreachable
+return:
+  ret void
+
+; Check that the calls get sunk to the return block.
+; We would previously not sink calls without uses, see PR41259.
+; CHECK-LABEL: @test_sink_void_calls
+; CHECK-NOT: call
+; CHECK-LABEL: return:
+; CHECK: phi
+; CHECK: call
+; CHECK-NOT: call
+; CHECK: ret
+}
+
+
 ; CHECK: ![[$TBAA]] = !{![[TYPE:[0-9]]], ![[TYPE]], i64 0}
 ; CHECK: ![[TYPE]] = !{!"float", ![[TEXT:[0-9]]]}
 ; CHECK: ![[TEXT]] = !{!"an example type tree"}