From d1f4a55c8c94c757e0335683f93c08f7c8fe0568 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 16 Sep 2019 16:18:24 +0000 Subject: [PATCH] [SimplifyCFG] FoldTwoEntryPHINode(): consider *total* speculation cost, not per-BB cost MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Summary: Previously, if the threshold was 2, we were willing to speculatively execute 2 cheap instructions in both basic blocks (thus we were willing to speculatively execute cost = 4), but weren't willing to speculate when one BB had 3 instructions and other one had no instructions, even thought that would have total cost of 3. This looks inconsistent to me. I don't think `cmov`-like instructions will start executing until both of it's inputs are available: https://godbolt.org/z/zgHePf So i don't see why the existing behavior is the correct one. Also, let's add it's own `cl::opt` for this threshold, with default=4, so it is not stricter than the previous threshold: will allow to fold when there are 2 BB's each with cost=2. And since the logic has changed, it will also allow to fold when one BB has cost=3 and other cost=1, or there is only one BB with cost=4. This is an alternative solution to D65148: This fix is mainly motivated by `signbit-like-value-extension.ll` test. That pattern comes up in JPEG decoding, see e.g. `Figure F.12 – Extending the sign bit of a decoded value in V` of `ITU T.81` (JPEG specification). That branch is not predictable, and it is within the innermost loop, so the fact that that pattern ends up being stuck with a branch instead of `select` (i.e. `CMOV` for x86) is unlikely to be beneficial. This has great results on the final assembly (vanilla test-suite + RawSpeed): (metric pass - D67240) | metric | old | new | delta | % | | x86-mi-counting.NumMachineFunctions | 37720 | 37721 | 1 | 0.00% | | x86-mi-counting.NumMachineBasicBlocks | 773545 | 771181 | -2364 | -0.31% | | x86-mi-counting.NumMachineInstructions | 7488843 | 7486442 | -2401 | -0.03% | | x86-mi-counting.NumUncondBR | 135770 | 135543 | -227 | -0.17% | | x86-mi-counting.NumCondBR | 423753 | 422187 | -1566 | -0.37% | | x86-mi-counting.NumCMOV | 24815 | 25731 | 916 | 3.69% | | x86-mi-counting.NumVecBlend | 17 | 17 | 0 | 0.00% | We significantly decrease basic block count, notably decrease instruction count, significantly decrease branch count and very significantly increase `cmov` count. Performance-wise, unsurprisingly, this has great effect on target RawSpeed benchmark. I'm seeing 5 **major** improvements: ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49 Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_mean -0.3064 -0.3064 226.9913 157.4452 226.9800 157.4384 Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_median -0.3057 -0.3057 226.8407 157.4926 226.8282 157.4828 Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_stddev -0.4985 -0.4954 0.3051 0.1530 0.3040 0.1534 Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49 Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_mean -0.1747 -0.1747 80.4787 66.4227 80.4771 66.4146 Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_median -0.1742 -0.1743 80.4686 66.4542 80.4690 66.4436 Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_stddev +0.6089 +0.5797 0.0670 0.1078 0.0673 0.1062 Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49 Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_mean -0.1598 -0.1598 171.6996 144.2575 171.6915 144.2538 Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_median -0.1598 -0.1597 171.7109 144.2755 171.7018 144.2766 Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_stddev +0.4024 +0.3850 0.0847 0.1187 0.0848 0.1175 Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49 Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_mean -0.0550 -0.0551 280.3046 264.8800 280.3017 264.8559 Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_median -0.0554 -0.0554 280.2628 264.7360 280.2574 264.7297 Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_stddev +0.7005 +0.7041 0.2779 0.4725 0.2775 0.4729 Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49 Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_mean -0.0354 -0.0355 316.7396 305.5208 316.7342 305.4890 Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_median -0.0354 -0.0356 316.6969 305.4798 316.6917 305.4324 Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_stddev +0.0493 +0.0330 0.3562 0.3737 0.3563 0.3681 ``` That being said, it's always best-effort, so there will likely be cases where this worsens things. Reviewers: efriedma, craig.topper, dmgreen, jmolloy, fhahn, Carrot, hfinkel, chandlerc Reviewed By: jmolloy Subscribers: xbolva00, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67318 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372009 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 27 +- .../IndVarSimplify/loop_evaluate_1.ll | 6 +- test/Transforms/PGOProfile/chr.ll | 28 +- test/Transforms/SimplifyCFG/PhiEliminate3.ll | 64 +--- .../Transforms/SimplifyCFG/SpeculativeExec.ll | 6 +- .../SimplifyCFG/X86/speculate-cttz-ctlz.ll | 304 ++++-------------- .../SimplifyCFG/X86/switch_to_lookup_table.ll | 6 +- test/Transforms/SimplifyCFG/safe-abs.ll | 6 +- .../SimplifyCFG/safe-low-bit-extract.ll | 6 +- .../signbit-like-value-extension.ll | 6 +- test/Transforms/SimplifyCFG/speculate-math.ll | 23 +- 11 files changed, 109 insertions(+), 373 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 2e0a07f105d..b7b644c981c 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -94,6 +94,12 @@ static cl::opt PHINodeFoldingThreshold( cl::desc( "Control the amount of phi node folding to perform (default = 2)")); +static cl::opt TwoEntryPHINodeFoldingThreshold( + "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), + cl::desc("Control the maximal total instruction cost that we are willing " + "to speculatively execute to fold a 2-entry PHI node into a " + "select (default = 4)")); + static cl::opt DupRet( "simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -332,7 +338,7 @@ static unsigned ComputeSpeculationCost(const User *I, /// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl &AggressiveInsts, - unsigned &CostRemaining, + int &BudgetRemaining, const TargetTransformInfo &TTI, unsigned Depth = 0) { // It is possible to hit a zero-cost cycle (phi/gep instructions for example), @@ -375,7 +381,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = ComputeSpeculationCost(I, TTI); + BudgetRemaining -= ComputeSpeculationCost(I, TTI); // Allow exactly one instruction to be speculated regardless of its cost // (as long as it is safe to do so). @@ -383,17 +389,14 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // or other expensive operation. The speculation of an expensive instruction // is expected to be undone in CodeGenPrepare if the speculation has not // enabled further IR optimizations. - if (Cost > CostRemaining && + if (BudgetRemaining < 0 && (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0)) return false; - // Avoid unsigned wrap. - CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost; - // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI, + if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI, Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. @@ -2322,10 +2325,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet AggressiveInsts; - unsigned MaxCostVal0 = PHINodeFoldingThreshold, - MaxCostVal1 = PHINodeFoldingThreshold; - MaxCostVal0 *= TargetTransformInfo::TCC_Basic; - MaxCostVal1 *= TargetTransformInfo::TCC_Basic; + int BudgetRemaining = + TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); @@ -2336,9 +2337,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, - MaxCostVal0, TTI) || + BudgetRemaining, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, - MaxCostVal1, TTI)) + BudgetRemaining, TTI)) return false; } diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll index a1e1d013124..6edacc17518 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll @@ -28,14 +28,10 @@ define i32 @test2(i32 %arg) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP:%.*]] = icmp ugt i32 [[ARG:%.*]], 10 -; CHECK-NEXT: br i1 [[TMP]], label [[BB1_PREHEADER:%.*]], label [[BB7:%.*]] -; CHECK: bb1.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -11 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1 -; CHECK-NEXT: br label [[BB7]] -; CHECK: bb7: -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP2]], [[BB1_PREHEADER]] ] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP]], i32 [[TMP2]], i32 0 ; CHECK-NEXT: ret i32 [[TMP8]] ; bb: diff --git a/test/Transforms/PGOProfile/chr.ll b/test/Transforms/PGOProfile/chr.ll index 838b7f6af82..0221a4fd000 100644 --- a/test/Transforms/PGOProfile/chr.ll +++ b/test/Transforms/PGOProfile/chr.ll @@ -468,15 +468,14 @@ define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM1_NONCHR]], i32 [[TMP12]], !prof !16 ; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16 -; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP0]], 8 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP14]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ] ; CHECK-NEXT: ret i32 [[SUM6]] ; entry: @@ -569,15 +568,14 @@ define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP14]], i32 [[SUM1_NONCHR]], i32 [[TMP15]], !prof !16 ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[SUM0]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -; CHECK-NEXT: br i1 [[TMP17]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16 -; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP0]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP19]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP19]], i32 44, i32 88 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP17]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ] ; CHECK-NEXT: ret i32 [[SUM6]] ; entry: @@ -667,15 +665,14 @@ define i32 @test_chr_6(i32* %i, i32* %j, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4_NONCHR]], i32 [[SUM0]], i32 [[V8_NONCHR]], !prof !16 ; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[J0]], 4 ; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0 -; CHECK-NEXT: br i1 [[V10_NONCHR]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16 -; CHECK: bb1.nonchr: ; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[I0]], 8 ; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0 -; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[V10_NONCHR]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ] ; CHECK-NEXT: ret i32 [[SUM6]] ; entry: @@ -1754,15 +1751,14 @@ define i32 @test_chr_19(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM0]], 85 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16 -; CHECK-NEXT: br i1 [[TMP8]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16 -; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 8 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 -; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP11]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP11]], i32 44, i32 88 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ] ; CHECK-NEXT: ret i32 [[SUM6]] ; entry: diff --git a/test/Transforms/SimplifyCFG/PhiEliminate3.ll b/test/Transforms/SimplifyCFG/PhiEliminate3.ll index 5f498d62d7e..4d7435ce3c2 100644 --- a/test/Transforms/SimplifyCFG/PhiEliminate3.ll +++ b/test/Transforms/SimplifyCFG/PhiEliminate3.ll @@ -7,55 +7,21 @@ ; with various folding thresholds define i32 @test(i1 %a, i1 %b, i32 %i, i32 %j, i32 %k) { -; CHECK-ONE-LABEL: @test( -; CHECK-ONE-NEXT: entry: -; CHECK-ONE-NEXT: br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]] -; CHECK-ONE: O: -; CHECK-ONE-NEXT: br i1 [[B:%.*]], label [[P:%.*]], label [[Q:%.*]] -; CHECK-ONE: P: -; CHECK-ONE-NEXT: [[IAJ:%.*]] = add i32 [[I:%.*]], [[J:%.*]] -; CHECK-ONE-NEXT: [[IAJAK:%.*]] = add i32 [[IAJ]], [[K:%.*]] -; CHECK-ONE-NEXT: br label [[N:%.*]] -; CHECK-ONE: Q: -; CHECK-ONE-NEXT: [[IXJ:%.*]] = xor i32 [[I]], [[J]] -; CHECK-ONE-NEXT: [[IXJXK:%.*]] = xor i32 [[IXJ]], [[K]] -; CHECK-ONE-NEXT: br label [[N]] -; CHECK-ONE: N: -; CHECK-ONE-NEXT: [[WP:%.*]] = phi i32 [ [[IAJAK]], [[P]] ], [ [[IXJXK]], [[Q]] ] -; CHECK-ONE-NEXT: [[WP2:%.*]] = add i32 [[WP]], [[WP]] -; CHECK-ONE-NEXT: br label [[M]] -; CHECK-ONE: M: -; CHECK-ONE-NEXT: [[W:%.*]] = phi i32 [ [[WP2]], [[N]] ], [ 2, [[ENTRY:%.*]] ] -; CHECK-ONE-NEXT: [[R:%.*]] = add i32 [[W]], 1 -; CHECK-ONE-NEXT: ret i32 [[R]] -; -; CHECK-TWO-LABEL: @test( -; CHECK-TWO-NEXT: entry: -; CHECK-TWO-NEXT: br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]] -; CHECK-TWO: O: -; CHECK-TWO-NEXT: [[IAJ:%.*]] = add i32 [[I:%.*]], [[J:%.*]] -; CHECK-TWO-NEXT: [[IAJAK:%.*]] = add i32 [[IAJ]], [[K:%.*]] -; CHECK-TWO-NEXT: [[IXJ:%.*]] = xor i32 [[I]], [[J]] -; CHECK-TWO-NEXT: [[IXJXK:%.*]] = xor i32 [[IXJ]], [[K]] -; CHECK-TWO-NEXT: [[WP:%.*]] = select i1 [[B:%.*]], i32 [[IAJAK]], i32 [[IXJXK]] -; CHECK-TWO-NEXT: [[WP2:%.*]] = add i32 [[WP]], [[WP]] -; CHECK-TWO-NEXT: br label [[M]] -; CHECK-TWO: M: -; CHECK-TWO-NEXT: [[W:%.*]] = phi i32 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ] -; CHECK-TWO-NEXT: [[R:%.*]] = add i32 [[W]], 1 -; CHECK-TWO-NEXT: ret i32 [[R]] -; -; CHECK-SEVEN-LABEL: @test( -; CHECK-SEVEN-NEXT: entry: -; CHECK-SEVEN-NEXT: [[IAJ:%.*]] = add i32 [[I:%.*]], [[J:%.*]] -; CHECK-SEVEN-NEXT: [[IAJAK:%.*]] = add i32 [[IAJ]], [[K:%.*]] -; CHECK-SEVEN-NEXT: [[IXJ:%.*]] = xor i32 [[I]], [[J]] -; CHECK-SEVEN-NEXT: [[IXJXK:%.*]] = xor i32 [[IXJ]], [[K]] -; CHECK-SEVEN-NEXT: [[WP:%.*]] = select i1 [[B:%.*]], i32 [[IAJAK]], i32 [[IXJXK]] -; CHECK-SEVEN-NEXT: [[WP2:%.*]] = add i32 [[WP]], [[WP]] -; CHECK-SEVEN-NEXT: [[W:%.*]] = select i1 [[A:%.*]], i32 2, i32 [[WP2]] -; CHECK-SEVEN-NEXT: [[R:%.*]] = add i32 [[W]], 1 -; CHECK-SEVEN-NEXT: ret i32 [[R]] +; ALL-LABEL: @test( +; ALL-NEXT: entry: +; ALL-NEXT: br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]] +; ALL: O: +; ALL-NEXT: [[IAJ:%.*]] = add i32 [[I:%.*]], [[J:%.*]] +; ALL-NEXT: [[IAJAK:%.*]] = add i32 [[IAJ]], [[K:%.*]] +; ALL-NEXT: [[IXJ:%.*]] = xor i32 [[I]], [[J]] +; ALL-NEXT: [[IXJXK:%.*]] = xor i32 [[IXJ]], [[K]] +; ALL-NEXT: [[WP:%.*]] = select i1 [[B:%.*]], i32 [[IAJAK]], i32 [[IXJXK]] +; ALL-NEXT: [[WP2:%.*]] = add i32 [[WP]], [[WP]] +; ALL-NEXT: br label [[M]] +; ALL: M: +; ALL-NEXT: [[W:%.*]] = phi i32 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ] +; ALL-NEXT: [[R:%.*]] = add i32 [[W]], 1 +; ALL-NEXT: ret i32 [[R]] ; entry: br i1 %a, label %M, label %O diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll index e9e1d73a7b6..c21edd0c2ad 100644 --- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll +++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll @@ -8,14 +8,10 @@ define i32 @test1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[B:%.*]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[BB3:%.*]] -; CHECK: bb1: ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[C:%.*]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[A:%.*]], 1 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[A]] -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[BB1]] ] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i32 [[SPEC_SELECT]], i32 [[B]] ; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], 1 ; CHECK-NEXT: ret i32 [[TMP5]] ; diff --git a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll index 11ba3984f35..3a2f067a2ee 100644 --- a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll +++ b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll @@ -223,37 +223,13 @@ cond.end: ; preds = %entry, %cond.true ; for the target. define i64 @test1e(i32 %x) { -; BMI-LABEL: @test1e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; BMI-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]] -; BMI-NEXT: ret i64 [[COND]] -; -; LZCNT-LABEL: @test1e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; LZCNT: cond.true: -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; LZCNT-NEXT: br label [[COND_END]] -; LZCNT: cond.end: -; LZCNT-NEXT: [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; LZCNT-NEXT: ret i64 [[COND]] -; -; GENERIC-LABEL: @test1e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; GENERIC-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i64 [[COND]] +; ALL-LABEL: @test1e( +; ALL-NEXT: entry: +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 +; ALL-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) +; ALL-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 +; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]] +; ALL-NEXT: ret i64 [[COND]] ; entry: %tobool = icmp eq i32 %x, 0 @@ -270,37 +246,13 @@ cond.end: ; preds = %entry, %cond.true } define i32 @test2e(i64 %x) { -; BMI-LABEL: @test2e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] -; BMI-NEXT: ret i32 [[COND]] -; -; LZCNT-LABEL: @test2e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; LZCNT: cond.true: -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; LZCNT-NEXT: br label [[COND_END]] -; LZCNT: cond.end: -; LZCNT-NEXT: [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; LZCNT-NEXT: ret i32 [[COND]] -; -; GENERIC-LABEL: @test2e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i32 [[COND]] +; ALL-LABEL: @test2e( +; ALL-NEXT: entry: +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 +; ALL-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) +; ALL-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 +; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] +; ALL-NEXT: ret i32 [[COND]] ; entry: %tobool = icmp eq i64 %x, 0 @@ -317,37 +269,13 @@ cond.end: ; preds = %entry, %cond.true } define i64 @test3e(i32 %x) { -; BMI-LABEL: @test3e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; BMI-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; BMI: cond.true: -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; BMI-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; BMI-NEXT: br label [[COND_END]] -; BMI: cond.end: -; BMI-NEXT: [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; BMI-NEXT: ret i64 [[COND]] -; -; LZCNT-LABEL: @test3e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]] -; LZCNT-NEXT: ret i64 [[COND]] -; -; GENERIC-LABEL: @test3e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; GENERIC-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i64 [[COND]] +; ALL-LABEL: @test3e( +; ALL-NEXT: entry: +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 +; ALL-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) +; ALL-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 +; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]] +; ALL-NEXT: ret i64 [[COND]] ; entry: %tobool = icmp eq i32 %x, 0 @@ -364,37 +292,13 @@ cond.end: ; preds = %entry, %cond.true } define i32 @test4e(i64 %x) { -; BMI-LABEL: @test4e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; BMI-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; BMI: cond.true: -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; BMI-NEXT: br label [[COND_END]] -; BMI: cond.end: -; BMI-NEXT: [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; BMI-NEXT: ret i32 [[COND]] -; -; LZCNT-LABEL: @test4e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] -; LZCNT-NEXT: ret i32 [[COND]] -; -; GENERIC-LABEL: @test4e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i32 [[COND]] +; ALL-LABEL: @test4e( +; ALL-NEXT: entry: +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 +; ALL-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) +; ALL-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 +; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] +; ALL-NEXT: ret i32 [[COND]] ; entry: %tobool = icmp eq i64 %x, 0 @@ -411,37 +315,13 @@ cond.end: ; preds = %entry, %cond.true } define i16 @test5e(i64 %x) { -; BMI-LABEL: @test5e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; BMI-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; BMI: cond.true: -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; BMI-NEXT: br label [[COND_END]] -; BMI: cond.end: -; BMI-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test5e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test5e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i16 [[COND]] +; ALL-LABEL: @test5e( +; ALL-NEXT: entry: +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 +; ALL-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) +; ALL-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 +; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]] +; ALL-NEXT: ret i16 [[COND]] ; entry: %tobool = icmp eq i64 %x, 0 @@ -458,37 +338,13 @@ cond.end: ; preds = %entry, %cond.true } define i16 @test6e(i32 %x) { -; BMI-LABEL: @test6e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; BMI-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; BMI: cond.true: -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; BMI-NEXT: br label [[COND_END]] -; BMI: cond.end: -; BMI-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test6e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test6e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i16 [[COND]] +; ALL-LABEL: @test6e( +; ALL-NEXT: entry: +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 +; ALL-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) +; ALL-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 +; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] +; ALL-NEXT: ret i16 [[COND]] ; entry: %tobool = icmp eq i32 %x, 0 @@ -505,37 +361,13 @@ cond.end: ; preds = %entry, %cond.true } define i16 @test7e(i64 %x) { -; BMI-LABEL: @test7e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test7e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; LZCNT: cond.true: -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; LZCNT-NEXT: br label [[COND_END]] -; LZCNT: cond.end: -; LZCNT-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test7e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i16 [[COND]] +; ALL-LABEL: @test7e( +; ALL-NEXT: entry: +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 +; ALL-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) +; ALL-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 +; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]] +; ALL-NEXT: ret i16 [[COND]] ; entry: %tobool = icmp eq i64 %x, 0 @@ -552,37 +384,13 @@ cond.end: ; preds = %entry, %cond.true } define i16 @test8e(i32 %x) { -; BMI-LABEL: @test8e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test8e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; LZCNT: cond.true: -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; LZCNT-NEXT: br label [[COND_END]] -; LZCNT: cond.end: -; LZCNT-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test8e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i16 [[COND]] +; ALL-LABEL: @test8e( +; ALL-NEXT: entry: +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 +; ALL-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) +; ALL-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 +; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] +; ALL-NEXT: ret i16 [[COND]] ; entry: %tobool = icmp eq i32 %x, 0 diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll index c77438974b2..597b5b969a7 100644 --- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1437,14 +1437,10 @@ define i32 @no_reuse_cmp2(i32 %x, i32 %y) { ; CHECK-LABEL: @no_reuse_cmp2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[Y:%.*]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[SWITCH_ENTRY:%.*]], label [[SW_EPILOG:%.*]] -; CHECK: switch.entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4 ; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add i32 [[X]], 10 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 0 -; CHECK-NEXT: br label [[SW_EPILOG]] -; CHECK: sw.epilog: -; CHECK-NEXT: [[R_0:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[SWITCH_ENTRY]] ] +; CHECK-NEXT: [[R_0:%.*]] = select i1 [[EC]], i32 [[SPEC_SELECT]], i32 100 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[R_0]], 0 ; CHECK-NEXT: [[DOTR_0:%.*]] = select i1 [[CMP]], i32 100, i32 [[R_0]] ; CHECK-NEXT: ret i32 [[DOTR_0]] diff --git a/test/Transforms/SimplifyCFG/safe-abs.ll b/test/Transforms/SimplifyCFG/safe-abs.ll index 550fcefdde3..6d8028f8d94 100644 --- a/test/Transforms/SimplifyCFG/safe-abs.ll +++ b/test/Transforms/SimplifyCFG/safe-abs.ll @@ -8,14 +8,10 @@ define i32 @abs_with_clamp(i32 %arg) { ; CHECK-LABEL: @abs_with_clamp( ; CHECK-NEXT: begin: ; CHECK-NEXT: [[IS_POSITIVE:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 -; CHECK-NEXT: br i1 [[IS_POSITIVE]], label [[END:%.*]], label [[NEGATIVE:%.*]] -; CHECK: negative: ; CHECK-NEXT: [[IS_INT_MIN:%.*]] = icmp eq i32 [[ARG]], -2147483648 ; CHECK-NEXT: [[NEGATED:%.*]] = sub nsw i32 0, [[ARG]] ; CHECK-NEXT: [[ABS:%.*]] = select i1 [[IS_INT_MIN]], i32 2147483647, i32 [[NEGATED]] -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[ARG]], [[BEGIN:%.*]] ], [ [[ABS]], [[NEGATIVE]] ] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[IS_POSITIVE]], i32 [[ARG]], i32 [[ABS]] ; CHECK-NEXT: ret i32 [[TMP6]] ; begin: diff --git a/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll b/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll index 354cfff7ba7..19a6313aa1d 100644 --- a/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll +++ b/test/Transforms/SimplifyCFG/safe-low-bit-extract.ll @@ -9,14 +9,10 @@ define i32 @extract_low_bits(i32 %input, i32 %nbits) { ; CHECK-LABEL: @extract_low_bits( ; CHECK-NEXT: begin: ; CHECK-NEXT: [[SHOULD_MASK:%.*]] = icmp ult i32 [[NBITS:%.*]], 32 -; CHECK-NEXT: br i1 [[SHOULD_MASK]], label [[PERFORM_MASKING:%.*]], label [[END:%.*]] -; CHECK: perform_masking: ; CHECK-NEXT: [[MASK_NOT:%.*]] = shl nsw i32 -1, [[NBITS]] ; CHECK-NEXT: [[MASK:%.*]] = xor i32 [[MASK_NOT]], -1 ; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[MASK]], [[INPUT:%.*]] -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[MASKED]], [[PERFORM_MASKING]] ], [ [[INPUT]], [[BEGIN:%.*]] ] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[SHOULD_MASK]], i32 [[MASKED]], i32 [[INPUT]] ; CHECK-NEXT: ret i32 [[RES]] ; begin: diff --git a/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll b/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll index ea013ef9e6c..e955e0679dc 100644 --- a/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll +++ b/test/Transforms/SimplifyCFG/signbit-like-value-extension.ll @@ -11,14 +11,10 @@ define i32 @extend_value(i32 %storage, i32 %nbits) { ; CHECK-NEXT: [[SKIPNBITS:%.*]] = sub i32 32, [[NBITS:%.*]] ; CHECK-NEXT: [[VALUE:%.*]] = lshr i32 [[STORAGE:%.*]], [[SKIPNBITS]] ; CHECK-NEXT: [[SHOULDEXTEND:%.*]] = icmp sgt i32 [[STORAGE]], -1 -; CHECK-NEXT: br i1 [[SHOULDEXTEND]], label [[EXTEND:%.*]], label [[END:%.*]] -; CHECK: extend: ; CHECK-NEXT: [[HIGHBITMASK:%.*]] = shl nsw i32 -1, [[NBITS]] ; CHECK-NEXT: [[HIGHBITMASKPLUSONE:%.*]] = add nsw i32 [[HIGHBITMASK]], 1 ; CHECK-NEXT: [[EXTENDED:%.*]] = add i32 [[HIGHBITMASKPLUSONE]], [[VALUE]] -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[EXTENDED]], [[EXTEND]] ], [ [[VALUE]], [[BB:%.*]] ] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[SHOULDEXTEND]], i32 [[EXTENDED]], i32 [[VALUE]] ; CHECK-NEXT: ret i32 [[RES]] ; bb: diff --git a/test/Transforms/SimplifyCFG/speculate-math.ll b/test/Transforms/SimplifyCFG/speculate-math.ll index 9edffafa236..e3fe5ed2fda 100644 --- a/test/Transforms/SimplifyCFG/speculate-math.ll +++ b/test/Transforms/SimplifyCFG/speculate-math.ll @@ -12,23 +12,12 @@ declare float @llvm.minimum.f32(float, float) nounwind readonly declare float @llvm.maximum.f32(float, float) nounwind readonly define double @fdiv_test(double %a, double %b) { -; EXPENSIVE-LABEL: @fdiv_test( -; EXPENSIVE-NEXT: entry: -; EXPENSIVE-NEXT: [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00 -; EXPENSIVE-NEXT: [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]] -; EXPENSIVE-NEXT: [[COND:%.*]] = select i1 [[CMP]], double [[DIV]], double 0.000000e+00 -; EXPENSIVE-NEXT: ret double [[COND]] -; -; CHEAP-LABEL: @fdiv_test( -; CHEAP-NEXT: entry: -; CHEAP-NEXT: [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00 -; CHEAP-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]] -; CHEAP: cond.true: -; CHEAP-NEXT: [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]] -; CHEAP-NEXT: br label [[COND_END]] -; CHEAP: cond.end: -; CHEAP-NEXT: [[COND:%.*]] = phi double [ [[DIV]], [[COND_TRUE]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -; CHEAP-NEXT: ret double [[COND]] +; ALL-LABEL: @fdiv_test( +; ALL-NEXT: entry: +; ALL-NEXT: [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00 +; ALL-NEXT: [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]] +; ALL-NEXT: [[COND:%.*]] = select i1 [[CMP]], double [[DIV]], double 0.000000e+00 +; ALL-NEXT: ret double [[COND]] ; entry: %cmp = fcmp ogt double %a, 0.0 -- 2.50.1