LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
+LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
+
LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")
HelpText<"HIP device library">;
def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">,
Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>;
+def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">,
+ Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">;
+def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">;
def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group<i_Group>,
HelpText<"Path to libomptarget-nvptx libraries">;
def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
- CudaFeature::CUDA_USES_NEW_LAUNCH))
+ CudaFeature::CUDA_USES_NEW_LAUNCH) ||
+ CGF.getLangOpts().HIPUseNewLaunchAPI)
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
- // Lookup cudaLaunchKernel function.
+ // Lookup cudaLaunchKernel/hipLaunchKernel function.
// cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
// void **args, size_t sharedMem,
// cudaStream_t stream);
+ // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ // void **args, size_t sharedMem,
+ // hipStream_t stream);
TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+ auto LaunchKernelName = addPrefixToName("LaunchKernel");
IdentifierInfo &cudaLaunchKernelII =
- CGM.getContext().Idents.get("cudaLaunchKernel");
+ CGM.getContext().Idents.get(LaunchKernelName);
FunctionDecl *cudaLaunchKernelFD = nullptr;
for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
if (cudaLaunchKernelFD == nullptr) {
CGM.Error(CGF.CurFuncDecl->getLocation(),
- "Can't find declaration for cudaLaunchKernel()");
+ "Can't find declaration for " + LaunchKernelName);
return;
}
// Create temporary dim3 grid_dim, block_dim.
/*ShmemSize=*/ShmemSize.getType(),
/*Stream=*/Stream.getType()},
/*isVarArg=*/false),
- "__cudaPopCallConfiguration");
+ addUnderscoredPrefixToName("PopCallConfiguration"));
CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
{GridDim.getPointer(), BlockDim.getPointer(),
const CGFunctionInfo &FI =
CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
llvm::FunctionCallee cudaLaunchKernelFn =
- CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
+ CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
LaunchKernelArgs);
CGF.EmitBranch(EndBlock);
// Forward -cl options to -cc1
RenderOpenCLOptions(Args, CmdArgs);
+ if (Args.hasFlag(options::OPT_fhip_new_launch_api,
+ options::OPT_fno_hip_new_launch_api, false))
+ CmdArgs.push_back("-fhip-new-launch-api");
+
if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
Opts.CUDADeviceApproxTranscendentals = 1;
Opts.GPURelocatableDeviceCode = Args.hasArg(OPT_fgpu_rdc);
+ Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api);
if (Opts.ObjC) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
std::string Sema::getCudaConfigureFuncName() const {
if (getLangOpts().HIP)
- return "hipConfigureCall";
+ return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
+ : "hipConfigureCall";
// New CUDA kernel launch sequence.
if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
};
-typedef struct cudaStream *cudaStream_t;
-typedef enum cudaError {} cudaError_t;
#ifdef __HIP__
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
- cudaStream_t stream = 0);
+ hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
#else
+typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
size_t sharedSize = 0,
cudaStream_t stream = 0);
// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK
// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \
-// RUN: | FileCheck %s --check-prefixes=HIP,CHECK
-
+// RUN: | FileCheck %s --check-prefixes=HIP-OLD,CHECK
+// RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefixes=HIP-NEW,CHECK
#include "Inputs/cuda.h"
// CHECK-LABEL: define{{.*}}g1
-// HIP: call{{.*}}hipSetupArgument
-// HIP: call{{.*}}hipLaunchByPtr
+// HIP-OLD: call{{.*}}hipSetupArgument
+// HIP-OLD: call{{.*}}hipLaunchByPtr
+// HIP-NEW: call{{.*}}__hipPopCallConfiguration
+// HIP-NEW: call{{.*}}hipLaunchKernel
// CUDA-OLD: call{{.*}}cudaSetupArgument
// CUDA-OLD: call{{.*}}cudaLaunch
// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
// CHECK-LABEL: define{{.*}}main
int main(void) {
- // HIP: call{{.*}}hipConfigureCall
+ // HIP-OLD: call{{.*}}hipConfigureCall
+ // HIP-NEW: call{{.*}}__hipPushCallConfiguration
// CUDA-OLD: call{{.*}}cudaConfigureCall
// CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
// CHECK: icmp