CUDA_91,
CUDA_92,
CUDA_100,
- LATEST = CUDA_100,
+ CUDA_101,
+ LATEST = CUDA_101,
};
const char *CudaVersionToString(CudaVersion V);
// Input is "Major.Minor"
enum class CudaFeature {
// CUDA-9.2+ uses a new API for launching kernels.
CUDA_USES_NEW_LAUNCH,
+ // CUDA-10.1+ needs explicit end of GPU binary registration.
+ CUDA_USES_FATBIN_REGISTER_END,
};
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature);
return "9.2";
case CudaVersion::CUDA_100:
return "10.0";
+ case CudaVersion::CUDA_101:
+ return "10.1";
}
llvm_unreachable("invalid enum");
}
.Case("9.0", CudaVersion::CUDA_90)
.Case("9.1", CudaVersion::CUDA_91)
.Case("9.2", CudaVersion::CUDA_92)
- .Case("10.0", CudaVersion::CUDA_100);
+ .Case("10.0", CudaVersion::CUDA_100)
+ .Case("10.1", CudaVersion::CUDA_101);
}
const char *CudaArchToString(CudaArch A) {
return CudaVersion::CUDA_92;
case 100:
return CudaVersion::CUDA_100;
+ case 101:
+ return CudaVersion::CUDA_101;
default:
return CudaVersion::UNKNOWN;
}
switch (Feature) {
case CudaFeature::CUDA_USES_NEW_LAUNCH:
return Version >= CudaVersion::CUDA_92;
+ case CudaFeature::CUDA_USES_FATBIN_REGISTER_END:
+ return Version >= CudaVersion::CUDA_101;
}
llvm_unreachable("Unknown CUDA feature.");
}
// Call __cuda_register_globals(GpuBinaryHandle);
if (RegisterGlobalsFunc)
CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
+
+ // Call __cudaRegisterFatBinaryEnd(Handle) if this CUDA version needs it.
+ if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
+ CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
+ // void __cudaRegisterFatBinaryEnd(void **);
+ llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
+ "__cudaRegisterFatBinaryEnd");
+ CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
+ }
} else {
// Generate a unique module ID.
SmallString<64> ModuleID;
return CudaVersion::CUDA_92;
if (Major == 10 && Minor == 0)
return CudaVersion::CUDA_100;
+ if (Major == 10 && Minor == 1)
+ return CudaVersion::CUDA_101;
return CudaVersion::UNKNOWN;
}
#include "cuda.h"
#if !defined(CUDA_VERSION)
#error "cuda.h did not define CUDA_VERSION"
-#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000
+#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10010
#error "Unsupported CUDA version!"
#endif