From: Yaxun Liu Date: Tue, 3 Sep 2019 18:50:24 +0000 (+0000) Subject: [AMDGPU] Set default flat work group size to (1,256) for HIP X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9678410799615bdbf800fbb1d6b8c036061d9401;p=clang [AMDGPU] Set default flat work group size to (1,256) for HIP Differential Revision: https://reviews.llvm.org/D67048 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@370808 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 231a20c68f..760895a493 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -7915,8 +7915,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const bool IsOpenCLKernel = M.getLangOpts().OpenCL && FD->hasAttr(); - if ((IsOpenCLKernel || - (M.getLangOpts().HIP && FD->hasAttr())) && + const bool IsHIPKernel = M.getLangOpts().HIP && + FD->hasAttr(); + if ((IsOpenCLKernel || IsHIPKernel) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); @@ -7942,7 +7943,7 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); - } else if (IsOpenCLKernel) { + } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to 256. F->addFnAttr("amdgpu-flat-work-group-size", "1,256"); } diff --git a/test/CodeGenCUDA/kernel-amdgcn.cu b/test/CodeGenCUDA/kernel-amdgcn.cu index ffa6c9549f..135d303048 100644 --- a/test/CodeGenCUDA/kernel-amdgcn.cu +++ b/test/CodeGenCUDA/kernel-amdgcn.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s #include "Inputs/cuda.h" // CHECK: define amdgpu_kernel void @_ZN1A6kernelEv @@ -25,7 +25,7 @@ struct Dummy { EmptyKernelPtr Empty() { return EmptyKernel; } }; -// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_ +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]] template __global__ void template_kernel(T x) {} @@ -39,3 +39,4 @@ int main() { launch((void*)D.Empty()); return 0; } +// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"