def no_cuda_noopt_device_debug : Flag<["--"], "no-cuda-noopt-device-debug">;
def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group<i_Group>,
HelpText<"CUDA installation path">;
+def fopenmp_ptx_EQ : Joined<["--"], "fopenmp-ptx=">, Flags<[DriverOption]>,
+ HelpText<"Pass a PTX version +ptxXX, default +ptx42 (for PTX version 4.2) used by OpenMP device offloading.">;
def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>,
HelpText<"Path to ptxas (used for compiling CUDA code)">;
def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">,
// than LLVM defaults to. Use PTX4.2 which is the PTX version that
// came with CUDA-7.0.
CC1Args.push_back("-target-feature");
- CC1Args.push_back("+ptx42");
+
+ if (DeviceOffloadingKind == Action::OFK_OpenMP)
+ CC1Args.push_back(
+ DriverArgs.getLastArgValue(options::OPT_fopenmp_ptx_EQ,
+ "+ptx42").data());
+ else
+ CC1Args.push_back("+ptx42");
}
void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
/// ###########################################################################
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s
// CHK-PTXAS-RELO: ptxas{{.*}}" "-c"
+
+/// ###########################################################################
+
+/// Check PTXAS is passed the compute capability passed to the driver.
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --fopenmp-ptx=+ptx52 -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-PTXAS-VERSION %s
+
+// CHK-PTXAS-VERSION: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52"
+// CHK-PTXAS-VERSION-NEXT: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52"
+// CHK-PTXAS-VERSION-NEXT: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52"