[OPENMP, NVPTX] Do not globalize variables with reference/pointer types.

author Alexey Bataev <a.bataev@hotmail.com>

Tue, 15 May 2018 18:01:01 +0000 (18:01 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Tue, 15 May 2018 18:01:01 +0000 (18:01 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Tue, 15 May 2018 18:01:01 +0000 (18:01 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Tue, 15 May 2018 18:01:01 +0000 (18:01 +0000)
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

index bad4a8b3778adb19f2c9621200f614df3542e609..1f5bfee41bd90e3aff6999ebe5616c19a55662d5 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -220,7 +220,10 @@ class CheckVarsEscapingDeclContext final
                 "Parameter captured by value with variably modified type");
          EscapedParameters.insert(VD);
        }
-    }
+    } else if (VD->getType()->isAnyPointerType() ||
+               VD->getType()->isReferenceType())
+      // Do not globalize variables with reference or pointer type.
+      return;
      if (VD->getType()->isVariablyModifiedType())
        EscapedVariableLengthDecls.insert(VD);
      else
@@ -602,9 +605,12 @@ static const Stmt *getSingleCompoundChild(const Stmt *Body) {
  }
  
  /// Check if the parallel directive has an 'if' clause with non-constant or
-/// false condition.
-static bool hasParallelIfClause(ASTContext &Ctx,
-                                const OMPExecutableDirective &D) {
+/// false condition. Also, check if the number of threads is strictly specified
+/// and run those directives in non-SPMD mode.
+static bool hasParallelIfNumThreadsClause(ASTContext &Ctx,
+                                          const OMPExecutableDirective &D) {
+  if (D.hasClausesOfKind<OMPNumThreadsClause>())
+    return true;
    for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
      OpenMPDirectiveKind NameModifier = C->getNameModifier();
      if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown)
@@ -629,7 +635,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
      switch (D.getDirectiveKind()) {
      case OMPD_target:
        if (isOpenMPParallelDirective(DKind) &&
-          !hasParallelIfClause(Ctx, *NestedDir))
+          !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
          return true;
        if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
          Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
@@ -639,7 +645,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
          if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
            DKind = NND->getDirectiveKind();
            if (isOpenMPParallelDirective(DKind) &&
-              !hasParallelIfClause(Ctx, *NND))
+              !hasParallelIfNumThreadsClause(Ctx, *NND))
              return true;
            if (DKind == OMPD_distribute) {
              Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
@@ -651,7 +657,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
              if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
                DKind = NND->getDirectiveKind();
                return isOpenMPParallelDirective(DKind) &&
-                     !hasParallelIfClause(Ctx, *NND);
+                     !hasParallelIfNumThreadsClause(Ctx, *NND);
              }
            }
          }
@@ -659,7 +665,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
        return false;
      case OMPD_target_teams:
        if (isOpenMPParallelDirective(DKind) &&
-          !hasParallelIfClause(Ctx, *NestedDir))
+          !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
          return true;
        if (DKind == OMPD_distribute) {
          Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
@@ -669,13 +675,13 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
          if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
            DKind = NND->getDirectiveKind();
            return isOpenMPParallelDirective(DKind) &&
-                 !hasParallelIfClause(Ctx, *NND);
+                 !hasParallelIfNumThreadsClause(Ctx, *NND);
          }
        }
        return false;
      case OMPD_target_teams_distribute:
        return isOpenMPParallelDirective(DKind) &&
-             !hasParallelIfClause(Ctx, *NestedDir);
+             !hasParallelIfNumThreadsClause(Ctx, *NestedDir);
      case OMPD_target_simd:
      case OMPD_target_parallel:
      case OMPD_target_parallel_for:
@@ -746,7 +752,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
    case OMPD_target_parallel_for_simd:
    case OMPD_target_teams_distribute_parallel_for:
    case OMPD_target_teams_distribute_parallel_for_simd:
-    return !hasParallelIfClause(Ctx, D);
+    return !hasParallelIfNumThreadsClause(Ctx, D);
    case OMPD_target_simd:
    case OMPD_target_teams_distribute_simd:
      return false;
@@ -967,7 +973,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
    CGF.EmitBlock(ExecuteBB);
  
    IsInTargetMasterThreadRegion = true;
-  emitGenericVarsProlog(CGF, D.getLocStart());
  }
  
  void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
@@ -976,8 +981,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
    if (!CGF.HaveInsertPoint())
      return;
  
-  emitGenericVarsEpilog(CGF);
-
    if (!EST.ExitBB)
      EST.ExitBB = CGF.createBasicBlock(".exit");
  
@@ -1464,8 +1467,7 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
                                                OpenMPProcBindClauseKind ProcBind,
                                                SourceLocation Loc) {
    // Do nothing in case of Spmd mode and L0 parallel.
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
-      IsInTargetMasterThreadRegion)
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
      return;
  
    CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
@@ -1475,8 +1477,7 @@ void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
                                                  llvm::Value *NumThreads,
                                                  SourceLocation Loc) {
    // Do nothing in case of Spmd mode and L0 parallel.
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
-      IsInTargetMasterThreadRegion)
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
      return;
  
    CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
@@ -1887,8 +1888,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
    // Just call the outlined function to execute the parallel region.
    // OutlinedFn(&GTid, &zero, CapturedStruct);
    //
-  // TODO: Do something with IfCond when support for the 'if' clause
-  // is added on Spmd target directives.
    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
  
    Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
diff --git a/test/OpenMP/nvptx_target_codegen.cpp b/test/OpenMP/nvptx_target_codegen.cpp

index 718c650bec6e92df12d270c146da38f0982e90df..0a8e56106ed7a68616f089c5c70e08cef3cc8ba0 100644 (file)
--- a/test/OpenMP/nvptx_target_codegen.cpp
+++ b/test/OpenMP/nvptx_target_codegen.cpp
@@ -18,7 +18,7 @@
  
  __thread int id;
  
-int baz(int f);
+int baz(int f, double &a);
  
  template<typename tx, typename ty>
  struct TT{
@@ -345,7 +345,7 @@ struct S1 {
      {
        this->a = (double)b + 1.5;
        c[1][1] = ++a;
-      baz(a);
+      baz(a, a);
      }
  
      return c[1][1] + (int)b;
@@ -367,9 +367,9 @@ int bar(int n){
    return a;
  }
  
-int baz(int f) {
+int baz(int f, double &a) {
  #pragma omp parallel
-  f = 2;
+  f = 2 + a;
    return f;
  }
  
@@ -551,7 +551,7 @@ int baz(int f) {
    // CHECK: [[EXIT]]
    // CHECK: ret void
  
-  // CHECK: define i32 [[BAZ]](i32 [[F:%.*]])
+  // CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}})
    // CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
    // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
    // CHECK: [[GTID_ADDR:%.+]] = alloca i32,
@@ -559,13 +559,13 @@ int baz(int f) {
    // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
    // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
    // CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0
-  // CHECK: store i32 [[F]], i32* [[F_PTR]],
+  // CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
    // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]],
    // CHECK: icmp eq i32
    // CHECK: br i1
  
    // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
-  // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 1)
+  // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2)
    // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]],
    // CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0
    // CHECK: [[F_REF:%.+]] = bitcast i32* [[F_PTR]] to i8*
@@ -580,11 +580,11 @@ int baz(int f) {
    // CHECK: br i1
  
    // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
-  // CHECK: call void @__omp_outlined__(i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]])
+  // CHECK: call void [[OUTLINED:@.+]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
    // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
    // CHECK: br label
  
-  // CHECK: call void @__omp_outlined__(i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]])
+  // CHECK: call void [[OUTLINED]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
    // CHECK: br label
  
    // CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]],
diff --git a/test/OpenMP/nvptx_target_parallel_codegen.cpp b/test/OpenMP/nvptx_target_parallel_codegen.cpp

index 64d195c43a26e2f0e11cf75d945512c90b5bc1ec..d193174ce7d91078cca5eb0e26c10bd8c0cd8136 100644 (file)
--- a/test/OpenMP/nvptx_target_parallel_codegen.cpp
+++ b/test/OpenMP/nvptx_target_parallel_codegen.cpp
@@ -55,6 +55,7 @@ int bar(int n){
  
    // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
    // CHECK: [[AA_ADDR:%.+]] = alloca i16*, align
+  // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack
    // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
    // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
    // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
diff --git a/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp

index 73d3bf8213447c99208b0d84e4ce048b609c49c5..13a7fb289dad4d557be4adfa7abc959324a620ed 100644 (file)
--- a/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
+++ b/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
@@ -8,9 +8,9 @@
  #ifndef HEADER
  #define HEADER
  
-// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l21}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 0
+// Check that the execution mode of all 2 target regions on the gpu is set to non-SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l21}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 1
  
  template<typename tx>
  tx ftemplate(int n) {
@@ -46,23 +46,13 @@ int bar(int n){
    // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
    // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
    // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-  // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-  // CHECK: br label {{%?}}[[EXEC:.+]]
-  //
-  // CHECK: [[EXEC]]
-  // CHECK-NOT: call void @__kmpc_push_num_threads
-  // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]])
-  // CHECK: br label {{%?}}[[DONE:.+]]
-  //
-  // CHECK: [[DONE]]
-  // CHECK: call void @__kmpc_spmd_kernel_deinit()
-  // CHECK: br label {{%?}}[[EXIT:.+]]
-  //
-  // CHECK: [[EXIT]]
+  // CHECK: call void @__kmpc_kernel_init(i32
+  // CHECK: call void @__kmpc_push_num_threads
+  // CHECK: call void @__kmpc_kernel_deinit(i16 1)
    // CHECK: ret void
    // CHECK: }
  
-  // CHECK: define internal void [[OP1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i16* {{[^%]*}}[[ARG:%.+]])
+  // CHECK: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* {{[^%]*}}[[ARG:%.+]])
    // CHECK: = alloca i32*, align
    // CHECK: = alloca i32*, align
    // CHECK: [[AA_ADDR:%.+]] = alloca i16*, align
@@ -89,23 +79,13 @@ int bar(int n){
    // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
    // CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align
    // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-  // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-  // CHECK: br label {{%?}}[[EXEC:.+]]
-  //
-  // CHECK: [[EXEC]]
-  // CHECK-NOT: call void @__kmpc_push_num_threads
-  // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
-  // CHECK: br label {{%?}}[[DONE:.+]]
-  //
-  // CHECK: [[DONE]]
-  // CHECK: call void @__kmpc_spmd_kernel_deinit()
-  // CHECK: br label {{%?}}[[EXIT:.+]]
-  //
-  // CHECK: [[EXIT]]
+  // CHECK: call void @__kmpc_kernel_init(i32
+  // CHECK: call void @__kmpc_push_num_threads
+  // CHECK: call void @__kmpc_kernel_deinit(i16 1)
    // CHECK: ret void
    // CHECK: }
  
-  // CHECK: define internal void [[OP2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* {{[^%]*}}[[ARG1:%.+]], i16* {{[^%]*}}[[ARG2:%.+]], [10 x i32]* {{[^%]*}}[[ARG3:%.+]])
+  // CHECK: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{[^%]*}}[[ARG1:%.+]], i16* {{[^%]*}}[[ARG2:%.+]], [10 x i32]* {{[^%]*}}[[ARG3:%.+]])
    // CHECK: = alloca i32*, align
    // CHECK: = alloca i32*, align
    // CHECK: [[A_ADDR:%.+]] = alloca i32*, align
diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp

index 66a3d46955a1452f7aa511100d8b99e7eb57366d..195f428e0fbb6bed723a44b670aece5f0f7bd9c6 100644 (file)
--- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -45,7 +45,7 @@ tx ftemplate(int n) {
      b[i] += 1;
    }
  
-#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) num_threads(M)
+#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k)
    for(int i = 0; i < M; i++) {
      for(int j = 0; j < M; j++) {
        k = M;
diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp

index c508bc912fdb393861973a8dc319a6beec3839ec..051ccfe9c403aec1268fca478dd9e582696cb38e 100644 (file)
--- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -43,7 +43,7 @@ tx ftemplate(int n) {
      b[i] += 1;
    }
  
-#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) num_threads(M)
+#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k)
    for(int i = 0; i < M; i++) {
      for(int j = 0; j < M; j++) {
        k = M;
author	Alexey Bataev <a.bataev@hotmail.com>
	Tue, 15 May 2018 18:01:01 +0000 (18:01 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Tue, 15 May 2018 18:01:01 +0000 (18:01 +0000)
lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp		patch \| blob \| history
test/OpenMP/nvptx_target_codegen.cpp		patch \| blob \| history
test/OpenMP/nvptx_target_parallel_codegen.cpp		patch \| blob \| history
test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp		patch \| blob \| history
test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp		patch \| blob \| history
test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp		patch \| blob \| history