This patch corrects the maximum workgroups per CU if we have big
workgroups (more than 128). This calculation contributes to the
occupancy calculation in respect to LDS size.
Differential Revision: https://reviews.llvm.org/D29974
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295134
91177308-0d34-0410-b5e6-
96231b3b80d8
unsigned FlatWorkGroupSize) {
if (!Features.test(FeatureGCN))
return 8;
- return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
+ unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ if (N == 1)
+ return 40;
+ N = 40 / N;
+ return std::min(N, 16u);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
}
; ALL-LABEL: @occupancy_0(
-; ALL: alloca [5 x i32]
+; CI-NOT: alloca [5 x i32]
+; SI: alloca [5 x i32]
define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
entry:
%stack = alloca [5 x i32], align 4
}
; ALL-LABEL: @occupancy_max(
-; ALL: alloca [5 x i32]
+; CI-NOT: alloca [5 x i32]
+; SI: alloca [5 x i32]
define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
entry:
%stack = alloca [5 x i32], align 4