Summary:
- As LCSSA is turned on just before isel, it may create PHI of the flow,
which is consumed by pseudo structurized CFG instructions. When that
PHIs are eliminated in O0, COPY may be placed wrongly as the these
pseudo structurized CFG instructions are considering prologue of MBB.
- Run extra `unreachable-mbb-elimination` at the end of isel to clean up
PHIs.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64353
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367023
91177308-0d34-0410-b5e6-
96231b3b80d8
addPass(createSILowerI1CopiesPass());
addPass(createSIFixupVectorISelPass());
addPass(createSIAddIMGInitPass());
+ // FIXME: Remove this once the phi on CF_END is cleaned up by either removing
+ // LCSSA or other ways.
+ addPass(&UnreachableMachineBlockElimID);
return false;
}
--- /dev/null
+; RUN: llc -march=amdgcn -O0 -o - %s | FileCheck %s
+
+; CHECK-LABEL: non_uniform_loop
+; CHECK: s_endpgm
+define amdgpu_kernel void @non_uniform_loop(float addrspace(1)* %array) {
+entry:
+ %w = tail call i32 @llvm.amdgcn.workitem.id.x()
+ br label %for.cond
+
+for.cond:
+ %i = phi i32 [0, %entry], [%i.next, %for.inc]
+ %cmp = icmp ult i32 %i, %w
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %i.next = add i32 %i, 1
+ br label %for.cond
+
+for.end:
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x()