bool PrepareForLTO;
bool PrepareForThinLTO;
bool PerformThinLTO;
+ bool DivergentTarget;
/// Enable profile instrumentation pass.
bool EnablePGOInstrGen;
//
// LoopUnswitch - This pass is a simple loop unswitching pass.
//
-Pass *createLoopUnswitchPass(bool OptimizeForSize = false);
+Pass *createLoopUnswitchPass(bool OptimizeForSize = false,
+ bool hasBranchDivergence = false);
//===----------------------------------------------------------------------===//
//
}
void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ Builder.DivergentTarget = true;
+
bool Internalize = InternalizeSymbols &&
(getOptLevel() > CodeGenOpt::None) &&
(getTargetTriple().getArch() == Triple::amdgcn);
PGOInstrUse = RunPGOInstrUse;
PrepareForThinLTO = EnablePrepareForThinLTO;
PerformThinLTO = false;
+ DivergentTarget = false;
}
PassManagerBuilder::~PassManagerBuilder() {
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createCorrelatedValuePropagationPass());
addInstructionCombiningPass(MPM);
MPM.add(createLICMPass());
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
}
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
// NewBlocks contained cloned copy of basic blocks from LoopBlocks.
std::vector<BasicBlock*> NewBlocks;
+ bool hasBranchDivergence;
+
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopUnswitch(bool Os = false) :
+ explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
- loopPreheader(nullptr) {
+ loopPreheader(nullptr), hasBranchDivergence(hasBranchDivergence) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (hasBranchDivergence)
+ AU.addRequired<DivergenceAnalysis>();
getLoopAnalysisUsage(AU);
}
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
-Pass *llvm::createLoopUnswitchPass(bool Os) {
- return new LoopUnswitch(Os);
+Pass *llvm::createLoopUnswitchPass(bool Os, bool hasBranchDivergence) {
+ return new LoopUnswitch(Os, hasBranchDivergence);
}
/// Operator chain lattice.
<< ". Cost too high.\n");
return false;
}
+ if (hasBranchDivergence &&
+ getAnalysis<DivergenceAnalysis>().isDivergent(LoopCond)) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName()
+ << " at non-trivial condition '" << *Val
+ << "' == " << *LoopCond << "\n"
+ << ". Condition is divergent.\n");
+ return false;
+ }
UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
return true;
--- /dev/null
+; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s
+
+; Check that loop unswitch happened and condition hoisted out of the loop.
+; Condition is uniform so all targets should perform unswitching.
+
+; CHECK-LABEL: {{^}}define void @uniform_unswitch
+; CHECK: entry:
+; CHECK-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
+; CHECK-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
+; CHECK-NEXT: and i1 [[LOOP_COND]], [[IF_COND]]
+; CHECK-NEXT: br i1
+
+define void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %cmp1 = icmp eq i32 %x, 123456
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.inc
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.inc, %for.body.lr.ph
+ %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.07
+ store i32 %i.07, i32 * %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; Check that loop unswitch does not happen if condition is divergent.
+
+; CHECK-LABEL: {{^}}define void @divergent_unswitch
+; CHECK: entry:
+; CHECK: icmp
+; CHECK: [[IF_COND:%[a-z0-9]+]] = icmp {{.*}} 567890
+; CHECK: br label
+; CHECK: br i1 [[IF_COND]]
+
+define void @divergent_unswitch(i32 * nocapture %out, i32 %n) {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %call = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+ %cmp2 = icmp eq i32 %call, 567890
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.inc
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.inc, %for.body.lr.ph
+ %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ br i1 %cmp2, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.010
+ store i32 %i.010, i32 * %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %inc = add nuw nsw i32 %i.010, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
--- /dev/null
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True