From d1f4a55c8c94c757e0335683f93c08f7c8fe0568 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 16 Sep 2019 16:18:24 +0000
Subject: [PATCH] [SimplifyCFG] FoldTwoEntryPHINode(): consider *total*
 speculation cost, not per-BB cost
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Summary:
Previously, if the threshold was 2, we were willing to speculatively
execute 2 cheap instructions in both basic blocks (thus we were willing
to speculatively execute cost = 4), but weren't willing to speculate
when one BB had 3 instructions and other one had no instructions,
even thought that would have total cost of 3.

This looks inconsistent to me.
I don't think `cmov`-like instructions will start executing
until both of it's inputs are available: https://godbolt.org/z/zgHePf
So i don't see why the existing behavior is the correct one.

Also, let's add it's own `cl::opt` for this threshold,
with default=4, so it is not stricter than the previous threshold:
will allow to fold when there are 2 BB's each with cost=2.
And since the logic has changed, it will also allow to fold when
one BB has cost=3 and other cost=1, or there is only one BB with cost=4.

This is an alternative solution to D65148:
This fix is mainly motivated by `signbit-like-value-extension.ll` test.
That pattern comes up in JPEG decoding, see e.g.
`Figure F.12 â Extending the sign bit of a decoded value in V`
of `ITU T.81` (JPEG specification).
That branch is not predictable, and it is within the innermost loop,
so the fact that that pattern ends up being stuck with a branch
instead of `select` (i.e. `CMOV` for x86) is unlikely to be beneficial.

This has great results on the final assembly (vanilla test-suite + RawSpeed): (metric pass - D67240)
| metric                                 |     old |     new | delta |      % |
| x86-mi-counting.NumMachineFunctions    |   37720 |   37721 |     1 |  0.00% |
| x86-mi-counting.NumMachineBasicBlocks  |  773545 |  771181 | -2364 | -0.31% |
| x86-mi-counting.NumMachineInstructions | 7488843 | 7486442 | -2401 | -0.03% |
| x86-mi-counting.NumUncondBR            |  135770 |  135543 |  -227 | -0.17% |
| x86-mi-counting.NumCondBR              |  423753 |  422187 | -1566 | -0.37% |
| x86-mi-counting.NumCMOV                |   24815 |   25731 |   916 |  3.69% |
| x86-mi-counting.NumVecBlend            |      17 |      17 |     0 |  0.00% |

We significantly decrease basic block count, notably decrease instruction count,
significantly decrease branch count and very significantly increase `cmov` count.

Performance-wise, unsurprisingly, this has great effect on
target RawSpeed benchmark. I'm seeing 5 **major** improvements:
```
Benchmark                                                                                             Time             CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_pvalue                                 0.0000          0.0000      U Test, Repetitions: 49 vs 49
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_mean                                  -0.3064         -0.3064      226.9913      157.4452      226.9800      157.4384
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_median                                -0.3057         -0.3057      226.8407      157.4926      226.8282      157.4828
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_stddev                                -0.4985         -0.4954        0.3051        0.1530        0.3040        0.1534
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_pvalue                                  0.0000          0.0000      U Test, Repetitions: 49 vs 49
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_mean                                   -0.1747         -0.1747       80.4787       66.4227       80.4771       66.4146
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_median                                 -0.1742         -0.1743       80.4686       66.4542       80.4690       66.4436
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_stddev                                 +0.6089         +0.5797        0.0670        0.1078        0.0673        0.1062
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_pvalue                                 0.0000          0.0000      U Test, Repetitions: 49 vs 49
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_mean                                  -0.1598         -0.1598      171.6996      144.2575      171.6915      144.2538
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_median                                -0.1598         -0.1597      171.7109      144.2755      171.7018      144.2766
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_stddev                                +0.4024         +0.3850        0.0847        0.1187        0.0848        0.1175
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_pvalue                                  0.0000          0.0000      U Test, Repetitions: 49 vs 49
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_mean                                   -0.0550         -0.0551      280.3046      264.8800      280.3017      264.8559
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_median                                 -0.0554         -0.0554      280.2628      264.7360      280.2574      264.7297
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_stddev                                 +0.7005         +0.7041        0.2779        0.4725        0.2775        0.4729
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_pvalue                                  0.0000          0.0000      U Test, Repetitions: 49 vs 49
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_mean                                   -0.0354         -0.0355      316.7396      305.5208      316.7342      305.4890
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_median                                 -0.0354         -0.0356      316.6969      305.4798      316.6917      305.4324
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_stddev                                 +0.0493         +0.0330        0.3562        0.3737        0.3563        0.3681
```

That being said, it's always best-effort, so there will likely
be cases where this worsens things.

Reviewers: efriedma, craig.topper, dmgreen, jmolloy, fhahn, Carrot, hfinkel, chandlerc

Reviewed By: jmolloy

Subscribers: xbolva00, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D67318

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372009 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp          |  27 +-
 .../IndVarSimplify/loop_evaluate_1.ll         |   6 +-
 test/Transforms/PGOProfile/chr.ll             |  28 +-
 test/Transforms/SimplifyCFG/PhiEliminate3.ll  |  64 +---
 .../Transforms/SimplifyCFG/SpeculativeExec.ll |   6 +-
 .../SimplifyCFG/X86/speculate-cttz-ctlz.ll    | 304 ++++--------------
 .../SimplifyCFG/X86/switch_to_lookup_table.ll |   6 +-
 test/Transforms/SimplifyCFG/safe-abs.ll       |   6 +-
 .../SimplifyCFG/safe-low-bit-extract.ll       |   6 +-
 .../signbit-like-value-extension.ll           |   6 +-
 test/Transforms/SimplifyCFG/speculate-math.ll |  23 +-
 11 files changed, 109 insertions(+), 373 deletions(-)

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 2e0a07f105d..b7b644c981c 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -94,6 +94,12 @@ static cl::opt<unsigned> PHINodeFoldingThreshold(
     cl::desc(
         "Control the amount of phi node folding to perform (default = 2)"));
 
+static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
+    "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
+    cl::desc("Control the maximal total instruction cost that we are willing "
+             "to speculatively execute to fold a 2-entry PHI node into a "
+             "select (default = 4)"));
+
 static cl::opt<bool> DupRet(
     "simplifycfg-dup-ret", cl::Hidden, cl::init(false),
     cl::desc("Duplicate return instructions into unconditional branches"));
@@ -332,7 +338,7 @@ static unsigned ComputeSpeculationCost(const User *I,
 /// CostRemaining, false is returned and CostRemaining is undefined.
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
                                 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
-                                unsigned &CostRemaining,
+                                int &BudgetRemaining,
                                 const TargetTransformInfo &TTI,
                                 unsigned Depth = 0) {
   // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
@@ -375,7 +381,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   if (!isSafeToSpeculativelyExecute(I))
     return false;
 
-  unsigned Cost = ComputeSpeculationCost(I, TTI);
+  BudgetRemaining -= ComputeSpeculationCost(I, TTI);
 
   // Allow exactly one instruction to be speculated regardless of its cost
   // (as long as it is safe to do so).
@@ -383,17 +389,14 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // or other expensive operation. The speculation of an expensive instruction
   // is expected to be undone in CodeGenPrepare if the speculation has not
   // enabled further IR optimizations.
-  if (Cost > CostRemaining &&
+  if (BudgetRemaining < 0 &&
       (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
     return false;
 
-  // Avoid unsigned wrap.
-  CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
-
   // Okay, we can only really hoist these out if their operands do
   // not take us over the cost threshold.
   for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI,
                              Depth + 1))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
@@ -2322,10 +2325,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   // instructions.  While we are at it, keep track of the instructions
   // that need to be moved to the dominating block.
   SmallPtrSet<Instruction *, 4> AggressiveInsts;
-  unsigned MaxCostVal0 = PHINodeFoldingThreshold,
-           MaxCostVal1 = PHINodeFoldingThreshold;
-  MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
-  MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
+  int BudgetRemaining =
+      TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
 
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
     PHINode *PN = cast<PHINode>(II++);
@@ -2336,9 +2337,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
     }
 
     if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
-                             MaxCostVal0, TTI) ||
+                             BudgetRemaining, TTI) ||
         !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
-                             MaxCostVal1, TTI))
+                             BudgetRemaining, TTI))
       return false;
   }
 
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
index a1e1d013124..6edacc17518 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
@@ -28,14 +28,10 @@ define i32 @test2(i32 %arg) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = icmp ugt i32 [[ARG:%.*]], 10
-; CHECK-NEXT:    br i1 [[TMP]], label [[BB1_PREHEADER:%.*]], label [[BB7:%.*]]
-; CHECK:       bb1.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ARG]], -11
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1
-; CHECK-NEXT:    br label [[BB7]]
-; CHECK:       bb7:
-; CHECK-NEXT:    [[TMP8:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP2]], [[BB1_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP]], i32 [[TMP2]], i32 0
 ; CHECK-NEXT:    ret i32 [[TMP8]]
 ;
 bb:
diff --git a/test/Transforms/PGOProfile/chr.ll b/test/Transforms/PGOProfile/chr.ll
index 838b7f6af82..0221a4fd000 100644
--- a/test/Transforms/PGOProfile/chr.ll
+++ b/test/Transforms/PGOProfile/chr.ll
@@ -468,15 +468,14 @@ define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 {
 ; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM1_NONCHR]], i32 [[TMP12]], !prof !16
 ; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[TMP0]], 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
-; CHECK-NEXT:    br i1 [[TMP14]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
-; CHECK:       bb1.nonchr:
 ; CHECK-NEXT:    [[TMP15:%.*]] = and i32 [[TMP0]], 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
-; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88
 ; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    [[SUM5_NONCHR:%.*]] = select i1 [[TMP14]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ]
 ; CHECK-NEXT:    ret i32 [[SUM6]]
 ;
 entry:
@@ -569,15 +568,14 @@ define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 {
 ; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[TMP14]], i32 [[SUM1_NONCHR]], i32 [[TMP15]], !prof !16
 ; CHECK-NEXT:    [[TMP16:%.*]] = and i32 [[SUM0]], 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0
-; CHECK-NEXT:    br i1 [[TMP17]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
-; CHECK:       bb1.nonchr:
 ; CHECK-NEXT:    [[TMP18:%.*]] = and i32 [[TMP0]], 8
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0
-; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP19]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP19]], i32 44, i32 88
 ; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    [[SUM5_NONCHR:%.*]] = select i1 [[TMP17]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ]
 ; CHECK-NEXT:    ret i32 [[SUM6]]
 ;
 entry:
@@ -667,15 +665,14 @@ define i32 @test_chr_6(i32* %i, i32* %j, i32 %sum0) !prof !14 {
 ; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[V4_NONCHR]], i32 [[SUM0]], i32 [[V8_NONCHR]], !prof !16
 ; CHECK-NEXT:    [[V9_NONCHR:%.*]] = and i32 [[J0]], 4
 ; CHECK-NEXT:    [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0
-; CHECK-NEXT:    br i1 [[V10_NONCHR]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
-; CHECK:       bb1.nonchr:
 ; CHECK-NEXT:    [[V11_NONCHR:%.*]] = and i32 [[I0]], 8
 ; CHECK-NEXT:    [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
-; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88
 ; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    [[SUM5_NONCHR:%.*]] = select i1 [[V10_NONCHR]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ]
 ; CHECK-NEXT:    ret i32 [[SUM6]]
 ;
 entry:
@@ -1754,15 +1751,14 @@ define i32 @test_chr_19(i32* %i, i32 %sum0) !prof !14 {
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[SUM0]], 85
 ; CHECK-NEXT:    [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16
-; CHECK-NEXT:    br i1 [[TMP8]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
-; CHECK:       bb1.nonchr:
 ; CHECK-NEXT:    [[TMP10:%.*]] = and i32 [[TMP0]], 8
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
-; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP11]], i32 44, i32 88, !prof !16
+; CHECK-NEXT:    [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP11]], i32 44, i32 88
 ; CHECK-NEXT:    [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
+; CHECK-NEXT:    [[SUM5_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ]
 ; CHECK-NEXT:    ret i32 [[SUM6]]
 ;
 entry:
diff --git a/test/Transforms/SimplifyCFG/PhiEliminate3.ll b/test/Transforms/SimplifyCFG/PhiEliminate3.ll
index 5f498d62d7e..4d7435ce3c2 100644
--- a/test/Transforms/SimplifyCFG/PhiEliminate3.ll
+++ b/test/Transforms/SimplifyCFG/PhiEliminate3.ll
@@ -7,55 +7,21 @@
 ; with various folding thresholds
 
 define i32 @test(i1 %a, i1 %b, i32 %i, i32 %j, i32 %k) {
-; CHECK-ONE-LABEL: @test(
-; CHECK-ONE-NEXT:  entry:
-; CHECK-ONE-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
-; CHECK-ONE:       O:
-; CHECK-ONE-NEXT:    br i1 [[B:%.*]], label [[P:%.*]], label [[Q:%.*]]
-; CHECK-ONE:       P:
-; CHECK-ONE-NEXT:    [[IAJ:%.*]] = add i32 [[I:%.*]], [[J:%.*]]
-; CHECK-ONE-NEXT:    [[IAJAK:%.*]] = add i32 [[IAJ]], [[K:%.*]]
-; CHECK-ONE-NEXT:    br label [[N:%.*]]
-; CHECK-ONE:       Q:
-; CHECK-ONE-NEXT:    [[IXJ:%.*]] = xor i32 [[I]], [[J]]
-; CHECK-ONE-NEXT:    [[IXJXK:%.*]] = xor i32 [[IXJ]], [[K]]
-; CHECK-ONE-NEXT:    br label [[N]]
-; CHECK-ONE:       N:
-; CHECK-ONE-NEXT:    [[WP:%.*]] = phi i32 [ [[IAJAK]], [[P]] ], [ [[IXJXK]], [[Q]] ]
-; CHECK-ONE-NEXT:    [[WP2:%.*]] = add i32 [[WP]], [[WP]]
-; CHECK-ONE-NEXT:    br label [[M]]
-; CHECK-ONE:       M:
-; CHECK-ONE-NEXT:    [[W:%.*]] = phi i32 [ [[WP2]], [[N]] ], [ 2, [[ENTRY:%.*]] ]
-; CHECK-ONE-NEXT:    [[R:%.*]] = add i32 [[W]], 1
-; CHECK-ONE-NEXT:    ret i32 [[R]]
-;
-; CHECK-TWO-LABEL: @test(
-; CHECK-TWO-NEXT:  entry:
-; CHECK-TWO-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
-; CHECK-TWO:       O:
-; CHECK-TWO-NEXT:    [[IAJ:%.*]] = add i32 [[I:%.*]], [[J:%.*]]
-; CHECK-TWO-NEXT:    [[IAJAK:%.*]] = add i32 [[IAJ]], [[K:%.*]]
-; CHECK-TWO-NEXT:    [[IXJ:%.*]] = xor i32 [[I]], [[J]]
-; CHECK-TWO-NEXT:    [[IXJXK:%.*]] = xor i32 [[IXJ]], [[K]]
-; CHECK-TWO-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i32 [[IAJAK]], i32 [[IXJXK]]
-; CHECK-TWO-NEXT:    [[WP2:%.*]] = add i32 [[WP]], [[WP]]
-; CHECK-TWO-NEXT:    br label [[M]]
-; CHECK-TWO:       M:
-; CHECK-TWO-NEXT:    [[W:%.*]] = phi i32 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ]
-; CHECK-TWO-NEXT:    [[R:%.*]] = add i32 [[W]], 1
-; CHECK-TWO-NEXT:    ret i32 [[R]]
-;
-; CHECK-SEVEN-LABEL: @test(
-; CHECK-SEVEN-NEXT:  entry:
-; CHECK-SEVEN-NEXT:    [[IAJ:%.*]] = add i32 [[I:%.*]], [[J:%.*]]
-; CHECK-SEVEN-NEXT:    [[IAJAK:%.*]] = add i32 [[IAJ]], [[K:%.*]]
-; CHECK-SEVEN-NEXT:    [[IXJ:%.*]] = xor i32 [[I]], [[J]]
-; CHECK-SEVEN-NEXT:    [[IXJXK:%.*]] = xor i32 [[IXJ]], [[K]]
-; CHECK-SEVEN-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i32 [[IAJAK]], i32 [[IXJXK]]
-; CHECK-SEVEN-NEXT:    [[WP2:%.*]] = add i32 [[WP]], [[WP]]
-; CHECK-SEVEN-NEXT:    [[W:%.*]] = select i1 [[A:%.*]], i32 2, i32 [[WP2]]
-; CHECK-SEVEN-NEXT:    [[R:%.*]] = add i32 [[W]], 1
-; CHECK-SEVEN-NEXT:    ret i32 [[R]]
+; ALL-LABEL: @test(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
+; ALL:       O:
+; ALL-NEXT:    [[IAJ:%.*]] = add i32 [[I:%.*]], [[J:%.*]]
+; ALL-NEXT:    [[IAJAK:%.*]] = add i32 [[IAJ]], [[K:%.*]]
+; ALL-NEXT:    [[IXJ:%.*]] = xor i32 [[I]], [[J]]
+; ALL-NEXT:    [[IXJXK:%.*]] = xor i32 [[IXJ]], [[K]]
+; ALL-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i32 [[IAJAK]], i32 [[IXJXK]]
+; ALL-NEXT:    [[WP2:%.*]] = add i32 [[WP]], [[WP]]
+; ALL-NEXT:    br label [[M]]
+; ALL:       M:
+; ALL-NEXT:    [[W:%.*]] = phi i32 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ]
+; ALL-NEXT:    [[R:%.*]] = add i32 [[W]], 1
+; ALL-NEXT:    ret i32 [[R]]
 ;
 entry:
   br i1 %a, label %M, label %O
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index e9e1d73a7b6..c21edd0c2ad 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -8,14 +8,10 @@ define i32 @test1(i32 %a, i32 %b, i32 %c) nounwind  {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[B:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[BB1:%.*]], label [[BB3:%.*]]
-; CHECK:       bb1:
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[C:%.*]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[A:%.*]], 1
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[A]]
-; CHECK-NEXT:    br label [[BB3]]
-; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[BB1]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i32 [[SPEC_SELECT]], i32 [[B]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 [[TMP4]], 1
 ; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
diff --git a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
index 11ba3984f35..3a2f067a2ee 100644
--- a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
+++ b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
@@ -223,37 +223,13 @@ cond.end:                                         ; preds = %entry, %cond.true
 ; for the target.
 
 define i64 @test1e(i32 %x) {
-; BMI-LABEL: @test1e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; BMI-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]]
-; BMI-NEXT:    ret i64 [[COND]]
-;
-; LZCNT-LABEL: @test1e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; LZCNT:       cond.true:
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; LZCNT-NEXT:    br label [[COND_END]]
-; LZCNT:       cond.end:
-; LZCNT-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; LZCNT-NEXT:    ret i64 [[COND]]
-;
-; GENERIC-LABEL: @test1e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; GENERIC-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i64 [[COND]]
+; ALL-LABEL: @test1e(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; ALL-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; ALL-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]]
+; ALL-NEXT:    ret i64 [[COND]]
 ;
 entry:
   %tobool = icmp eq i32 %x, 0
@@ -270,37 +246,13 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i32 @test2e(i64 %x) {
-; BMI-LABEL: @test2e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]]
-; BMI-NEXT:    ret i32 [[COND]]
-;
-; LZCNT-LABEL: @test2e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; LZCNT:       cond.true:
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; LZCNT-NEXT:    br label [[COND_END]]
-; LZCNT:       cond.end:
-; LZCNT-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; LZCNT-NEXT:    ret i32 [[COND]]
-;
-; GENERIC-LABEL: @test2e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i32 [[COND]]
+; ALL-LABEL: @test2e(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; ALL-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; ALL-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]]
+; ALL-NEXT:    ret i32 [[COND]]
 ;
 entry:
   %tobool = icmp eq i64 %x, 0
@@ -317,37 +269,13 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i64 @test3e(i32 %x) {
-; BMI-LABEL: @test3e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; BMI:       cond.true:
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; BMI-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; BMI-NEXT:    br label [[COND_END]]
-; BMI:       cond.end:
-; BMI-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; BMI-NEXT:    ret i64 [[COND]]
-;
-; LZCNT-LABEL: @test3e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]]
-; LZCNT-NEXT:    ret i64 [[COND]]
-;
-; GENERIC-LABEL: @test3e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; GENERIC-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i64 [[COND]]
+; ALL-LABEL: @test3e(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; ALL-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; ALL-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]]
+; ALL-NEXT:    ret i64 [[COND]]
 ;
 entry:
   %tobool = icmp eq i32 %x, 0
@@ -364,37 +292,13 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i32 @test4e(i64 %x) {
-; BMI-LABEL: @test4e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; BMI:       cond.true:
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; BMI-NEXT:    br label [[COND_END]]
-; BMI:       cond.end:
-; BMI-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; BMI-NEXT:    ret i32 [[COND]]
-;
-; LZCNT-LABEL: @test4e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]]
-; LZCNT-NEXT:    ret i32 [[COND]]
-;
-; GENERIC-LABEL: @test4e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i32 [[COND]]
+; ALL-LABEL: @test4e(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; ALL-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; ALL-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]]
+; ALL-NEXT:    ret i32 [[COND]]
 ;
 entry:
   %tobool = icmp eq i64 %x, 0
@@ -411,37 +315,13 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test5e(i64 %x) {
-; BMI-LABEL: @test5e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; BMI:       cond.true:
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; BMI-NEXT:    br label [[COND_END]]
-; BMI:       cond.end:
-; BMI-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test5e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test5e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i16 [[COND]]
+; ALL-LABEL: @test5e(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; ALL-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; ALL-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]]
+; ALL-NEXT:    ret i16 [[COND]]
 ;
 entry:
   %tobool = icmp eq i64 %x, 0
@@ -458,37 +338,13 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test6e(i32 %x) {
-; BMI-LABEL: @test6e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; BMI:       cond.true:
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; BMI-NEXT:    br label [[COND_END]]
-; BMI:       cond.end:
-; BMI-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test6e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test6e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i16 [[COND]]
+; ALL-LABEL: @test6e(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; ALL-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; ALL-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]]
+; ALL-NEXT:    ret i16 [[COND]]
 ;
 entry:
   %tobool = icmp eq i32 %x, 0
@@ -505,37 +361,13 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test7e(i64 %x) {
-; BMI-LABEL: @test7e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test7e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; LZCNT:       cond.true:
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; LZCNT-NEXT:    br label [[COND_END]]
-; LZCNT:       cond.end:
-; LZCNT-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test7e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i16 [[COND]]
+; ALL-LABEL: @test7e(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; ALL-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; ALL-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]]
+; ALL-NEXT:    ret i16 [[COND]]
 ;
 entry:
   %tobool = icmp eq i64 %x, 0
@@ -552,37 +384,13 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test8e(i32 %x) {
-; BMI-LABEL: @test8e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test8e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; LZCNT:       cond.true:
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; LZCNT-NEXT:    br label [[COND_END]]
-; LZCNT:       cond.end:
-; LZCNT-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test8e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i16 [[COND]]
+; ALL-LABEL: @test8e(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; ALL-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; ALL-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]]
+; ALL-NEXT:    ret i16 [[COND]]
 ;
 entry:
   %tobool = icmp eq i32 %x, 0
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index c77438974b2..597b5b969a7 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -1437,14 +1437,10 @@ define i32 @no_reuse_cmp2(i32 %x, i32 %y) {
 ; CHECK-LABEL: @no_reuse_cmp2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ne i32 [[Y:%.*]], 0
-; CHECK-NEXT:    br i1 [[EC]], label [[SWITCH_ENTRY:%.*]], label [[SW_EPILOG:%.*]]
-; CHECK:       switch.entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
 ; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[X]], 10
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 0
-; CHECK-NEXT:    br label [[SW_EPILOG]]
-; CHECK:       sw.epilog:
-; CHECK-NEXT:    [[R_0:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[SWITCH_ENTRY]] ]
+; CHECK-NEXT:    [[R_0:%.*]] = select i1 [[EC]], i32 [[SPEC_SELECT]], i32 100
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[R_0]], 0
 ; CHECK-NEXT:    [[DOTR_0:%.*]] = select i1 [[CMP]], i32 100, i32 [[R_0]]
 ; CHECK-NEXT:    ret i32 [[DOTR_0]]
diff --git a/test/Transforms/SimplifyCFG/safe-abs.ll b/test/Transforms/SimplifyCFG/safe-abs.ll
index 550fcefdde3..6d8028f8d94 100644
--- a/test/Transforms/SimplifyCFG/safe-abs.ll
+++ b/test/Transforms/SimplifyCFG/safe-abs.ll
@@ -8,14 +8,10 @@ define i32 @abs_with_clamp(i32 %arg) {
 ; CHECK-LABEL: @abs_with_clamp(
 ; CHECK-NEXT:  begin:
 ; CHECK-NEXT:    [[IS_POSITIVE:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
-; CHECK-NEXT:    br i1 [[IS_POSITIVE]], label [[END:%.*]], label [[NEGATIVE:%.*]]
-; CHECK:       negative:
 ; CHECK-NEXT:    [[IS_INT_MIN:%.*]] = icmp eq i32 [[ARG]], -2147483648
 ; CHECK-NEXT:    [[NEGATED:%.*]] = sub nsw i32 0, [[ARG]]
 ; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[IS_INT_MIN]], i32 2147483647, i32 [[NEGATED]]
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ [[ARG]], [[BEGIN:%.*]] ], [ [[ABS]], [[NEGATIVE]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[IS_POSITIVE]], i32 [[ARG]], i32 [[ABS]]
 ; CHECK-NEXT:    ret i32 [[TMP6]]
 ;
 begin:
diff --git a/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll b/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll
index 354cfff7ba7..19a6313aa1d 100644
--- a/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll
+++ b/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll
@@ -9,14 +9,10 @@ define i32 @extract_low_bits(i32 %input, i32 %nbits) {
 ; CHECK-LABEL: @extract_low_bits(
 ; CHECK-NEXT:  begin:
 ; CHECK-NEXT:    [[SHOULD_MASK:%.*]] = icmp ult i32 [[NBITS:%.*]], 32
-; CHECK-NEXT:    br i1 [[SHOULD_MASK]], label [[PERFORM_MASKING:%.*]], label [[END:%.*]]
-; CHECK:       perform_masking:
 ; CHECK-NEXT:    [[MASK_NOT:%.*]] = shl nsw i32 -1, [[NBITS]]
 ; CHECK-NEXT:    [[MASK:%.*]] = xor i32 [[MASK_NOT]], -1
 ; CHECK-NEXT:    [[MASKED:%.*]] = and i32 [[MASK]], [[INPUT:%.*]]
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[MASKED]], [[PERFORM_MASKING]] ], [ [[INPUT]], [[BEGIN:%.*]] ]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[SHOULD_MASK]], i32 [[MASKED]], i32 [[INPUT]]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
 begin:
diff --git a/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll b/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll
index ea013ef9e6c..e955e0679dc 100644
--- a/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll
+++ b/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll
@@ -11,14 +11,10 @@ define i32 @extend_value(i32 %storage, i32 %nbits) {
 ; CHECK-NEXT:    [[SKIPNBITS:%.*]] = sub i32 32, [[NBITS:%.*]]
 ; CHECK-NEXT:    [[VALUE:%.*]] = lshr i32 [[STORAGE:%.*]], [[SKIPNBITS]]
 ; CHECK-NEXT:    [[SHOULDEXTEND:%.*]] = icmp sgt i32 [[STORAGE]], -1
-; CHECK-NEXT:    br i1 [[SHOULDEXTEND]], label [[EXTEND:%.*]], label [[END:%.*]]
-; CHECK:       extend:
 ; CHECK-NEXT:    [[HIGHBITMASK:%.*]] = shl nsw i32 -1, [[NBITS]]
 ; CHECK-NEXT:    [[HIGHBITMASKPLUSONE:%.*]] = add nsw i32 [[HIGHBITMASK]], 1
 ; CHECK-NEXT:    [[EXTENDED:%.*]] = add i32 [[HIGHBITMASKPLUSONE]], [[VALUE]]
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[EXTENDED]], [[EXTEND]] ], [ [[VALUE]], [[BB:%.*]] ]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[SHOULDEXTEND]], i32 [[EXTENDED]], i32 [[VALUE]]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
 bb:
diff --git a/test/Transforms/SimplifyCFG/speculate-math.ll b/test/Transforms/SimplifyCFG/speculate-math.ll
index 9edffafa236..e3fe5ed2fda 100644
--- a/test/Transforms/SimplifyCFG/speculate-math.ll
+++ b/test/Transforms/SimplifyCFG/speculate-math.ll
@@ -12,23 +12,12 @@ declare float @llvm.minimum.f32(float, float) nounwind readonly
 declare float @llvm.maximum.f32(float, float) nounwind readonly
 
 define double @fdiv_test(double %a, double %b) {
-; EXPENSIVE-LABEL: @fdiv_test(
-; EXPENSIVE-NEXT:  entry:
-; EXPENSIVE-NEXT:    [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00
-; EXPENSIVE-NEXT:    [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]]
-; EXPENSIVE-NEXT:    [[COND:%.*]] = select i1 [[CMP]], double [[DIV]], double 0.000000e+00
-; EXPENSIVE-NEXT:    ret double [[COND]]
-;
-; CHEAP-LABEL: @fdiv_test(
-; CHEAP-NEXT:  entry:
-; CHEAP-NEXT:    [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00
-; CHEAP-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
-; CHEAP:       cond.true:
-; CHEAP-NEXT:    [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]]
-; CHEAP-NEXT:    br label [[COND_END]]
-; CHEAP:       cond.end:
-; CHEAP-NEXT:    [[COND:%.*]] = phi double [ [[DIV]], [[COND_TRUE]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHEAP-NEXT:    ret double [[COND]]
+; ALL-LABEL: @fdiv_test(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00
+; ALL-NEXT:    [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]]
+; ALL-NEXT:    [[COND:%.*]] = select i1 [[CMP]], double [[DIV]], double 0.000000e+00
+; ALL-NEXT:    ret double [[COND]]
 ;
 entry:
   %cmp = fcmp ogt double %a, 0.0
-- 
2.50.1