Enable 48-bytes of implicit arguments for HIP as well. Earlier it was enabled for OpenCL. This code is specific to AMDGPU target.
Differential Revision: https://reviews.llvm.org/D62244
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@363414
91177308-0d34-0410-b5e6-
96231b3b80d8
const auto *ReqdWGS = M.getLangOpts().OpenCL ?
FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
- if (M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>() &&
+ if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) ||
+ (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "48");
--- /dev/null
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s
+#include "Inputs/cuda.h"
+
+__global__ void hip_kernel_temp() {
+}
+
+// CHECK: attributes {{.*}} = {{.*}} "amdgpu-implicitarg-num-bytes"="48"