From: Alexey Bataev Date: Thu, 30 Aug 2018 14:45:24 +0000 (+0000) Subject: [OPENMP][NVPTX] Add options -f[no-]openmp-cuda-force-full-runtime. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=38d46ffb869526bb24e8289a5ec5ceb4214a7deb;p=clang [OPENMP][NVPTX] Add options -f[no-]openmp-cuda-force-full-runtime. Added options -f[no-]openmp-cuda-force-full-runtime to [not] force use of the full runtime for OpenMP offloading to CUDA devices. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@341073 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/LangOptions.def b/include/clang/Basic/LangOptions.def index 8c2865b3e4..285fcc1458 100644 --- a/include/clang/Basic/LangOptions.def +++ b/include/clang/Basic/LangOptions.def @@ -203,6 +203,7 @@ LANGOPT(OpenMPSimd , 1, 0, "Use SIMD only OpenMP support.") LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls") LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device") LANGOPT(OpenMPCUDAMode , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode") +LANGOPT(OpenMPCUDAForceFullRuntime , 1, 0, "Force to use full runtime in all constructs when offloading to CUDA devices") LANGOPT(OpenMPHostCXXExceptions , 1, 0, "C++ exceptions handling in the host code.") LANGOPT(RenderScript , 1, 0, "RenderScript") diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index d4162c0225..92b9784fd0 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -1531,6 +1531,10 @@ def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group, Flags<[NoArgumentUnused, HelpHidden]>; +def fopenmp_cuda_force_full_runtime : Flag<["-"], "fopenmp-cuda-force-full-runtime">, Group, + Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fno_openmp_cuda_force_full_runtime : Flag<["-"], "fno-openmp-cuda-force-full-runtime">, Group, + Flags<[NoArgumentUnused, HelpHidden]>; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; def fno_escaping_block_tail_calls : Flag<["-"], "fno-escaping-block-tail-calls">, Group, Flags<[CC1Option]>; diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 14845b5862..9dce947e17 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1218,7 +1218,8 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( EST.ExitBB = CGF.createBasicBlock(".exit"); // Initialize the OMP state in the runtime; called by all active threads. - bool RequiresFullRuntime = !supportsLightweightRuntime(CGF.getContext(), D); + bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime || + !supportsLightweightRuntime(CGF.getContext(), D); llvm::Value *Args[] = { getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), /*RequiresOMPRuntime=*/ diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp index 923c950b89..68706588d9 100644 --- a/lib/Driver/ToolChains/Clang.cpp +++ b/lib/Driver/ToolChains/Clang.cpp @@ -4039,8 +4039,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // When in OpenMP offloading mode with NVPTX target, forward // cuda-mode flag - Args.AddLastArg(CmdArgs, options::OPT_fopenmp_cuda_mode, - options::OPT_fno_openmp_cuda_mode); + if (Args.hasFlag(options::OPT_fopenmp_cuda_mode, + options::OPT_fno_openmp_cuda_mode, /*Default=*/false)) + CmdArgs.push_back("-fopenmp-cuda-mode"); + + // When in OpenMP offloading mode with NVPTX target, check if full runtime + // is required. + if (Args.hasFlag(options::OPT_fopenmp_cuda_force_full_runtime, + options::OPT_fno_openmp_cuda_force_full_runtime, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-cuda-force-full-runtime"); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index 97ac6c9dc9..b25d92bad6 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -2677,10 +2677,15 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, << Opts.OMPHostIRFile; } - // set CUDA mode for OpenMP target NVPTX if specified in options + // Set CUDA mode for OpenMP target NVPTX if specified in options Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() && Args.hasArg(options::OPT_fopenmp_cuda_mode); + // Set CUDA mode for OpenMP target NVPTX if specified in options + Opts.OpenMPCUDAForceFullRuntime = + Opts.OpenMPIsDevice && T.isNVPTX() && + Args.hasArg(options::OPT_fopenmp_cuda_force_full_runtime); + // Record whether the __DEPRECATED define was requested. Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro, OPT_fno_deprecated_macro, diff --git a/test/Driver/openmp-offload-gpu.c b/test/Driver/openmp-offload-gpu.c index c8c70d1094..e02d500c8f 100644 --- a/test/Driver/openmp-offload-gpu.c +++ b/test/Driver/openmp-offload-gpu.c @@ -216,3 +216,26 @@ // HAS_DEBUG: nvlink // HAS_DEBUG-SAME: "-g" +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" +// CUDA_MODE-SAME: "-fopenmp-cuda-mode" +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s +// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode" + +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s +// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" +// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime" +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s +// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime" diff --git a/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp b/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp new file mode 100644 index 0000000000..288410afd8 --- /dev/null +++ b/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp @@ -0,0 +1,328 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-force-full-runtime | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-force-full-runtime | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-force-full-runtime | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 +// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1 +// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 + +void foo() { +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target teams distribute parallel for simd + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(guided) + for (int i = 0; i < 10; ++i) + ; +int a; +// CHECK: call void @__kmpc_kernel_init( +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target teams distribute parallel for lastprivate(a) + for (int i = 0; i < 10; ++i) + a = i; +#pragma omp target teams distribute parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target teams +#pragma omp distribute parallel for simd + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target teams +#pragma omp distribute parallel for + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target parallel for + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target parallel +#pragma omp for simd + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target +#pragma omp parallel +#pragma omp for simd ordered + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target +#pragma omp parallel for + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +} + +#endif +