]> granicus.if.org Git - clang/commitdiff
[OPENMP, NVPTX] Fix globalization of the variables passed to orphaned
authorAlexey Bataev <a.bataev@hotmail.com>
Thu, 21 Jun 2018 20:26:33 +0000 (20:26 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Thu, 21 Jun 2018 20:26:33 +0000 (20:26 +0000)
parallel region.

If the current construct requires sharing of the local variable in the
inner parallel region, this variable must be globalized to avoid
runtime crash.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@335285 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
lib/CodeGen/CGOpenMPRuntimeNVPTX.h
test/OpenMP/nvptx_parallel_codegen.cpp

index 9452bdea4c7bae9eab8a1741c4ec47bbe0820ad5..e3cec13f7d5448102d467a284b7156b73400ec1a 100644 (file)
@@ -184,9 +184,10 @@ class CheckVarsEscapingDeclContext final
   llvm::SetVector<const ValueDecl *> EscapedDecls;
   llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
   llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
-  bool AllEscaped = false;
   RecordDecl *GlobalizedRD = nullptr;
   llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
+  bool AllEscaped = false;
+  bool IsForParallelRegion = false;
 
   static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
   isDeclareTargetDeclaration(const ValueDecl *VD) {
@@ -207,23 +208,32 @@ class CheckVarsEscapingDeclContext final
     // Variables captured by value must be globalized.
     if (auto *CSI = CGF.CapturedStmtInfo) {
       if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
-        if (!FD->hasAttrs())
-          return;
-        const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
-        if (!Attr)
-          return;
-        if (!isOpenMPPrivate(
-                static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) ||
-            Attr->getCaptureKind() == OMPC_map)
-          return;
-        if (FD->getType()->isReferenceType())
+        // Check if need to capture the variable that was already captured by
+        // value in the outer region.
+        if (!IsForParallelRegion) {
+          if (!FD->hasAttrs())
+            return;
+          const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
+          if (!Attr)
+            return;
+          if (!isOpenMPPrivate(
+                  static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) ||
+              Attr->getCaptureKind() == OMPC_map)
+            return;
+        }
+        if (!FD->getType()->isReferenceType()) {
+          assert(!VD->getType()->isVariablyModifiedType() &&
+                 "Parameter captured by value with variably modified type");
+          EscapedParameters.insert(VD);
+        } else if (!IsForParallelRegion) {
           return;
-        assert(!VD->getType()->isVariablyModifiedType() &&
-               "Parameter captured by value with variably modified type");
-        EscapedParameters.insert(VD);
+        }
       }
-    } else if (VD->getType()->isReferenceType())
-      // Do not globalize variables with reference or pointer type.
+    }
+    if ((!CGF.CapturedStmtInfo ||
+         (IsForParallelRegion && CGF.CapturedStmtInfo)) &&
+        VD->getType()->isReferenceType())
+      // Do not globalize variables with reference type.
       return;
     if (VD->getType()->isVariablyModifiedType())
       EscapedVariableLengthDecls.insert(VD);
@@ -243,15 +253,18 @@ class CheckVarsEscapingDeclContext final
       }
     }
   }
-  void VisitOpenMPCapturedStmt(const CapturedStmt *S) {
+  void VisitOpenMPCapturedStmt(const CapturedStmt *S, bool IsParallelRegion) {
     if (!S)
       return;
     for (const CapturedStmt::Capture &C : S->captures()) {
       if (C.capturesVariable() && !C.capturesVariableByCopy()) {
         const ValueDecl *VD = C.getCapturedVar();
+        bool SavedIsParallelRegion = IsForParallelRegion;
+        IsForParallelRegion = IsParallelRegion;
         markAsEscaped(VD);
         if (isa<OMPCapturedExprDecl>(VD))
           VisitValueDecl(VD);
+        IsForParallelRegion = SavedIsParallelRegion;
       }
     }
   }
@@ -316,20 +329,19 @@ public:
   void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {
     if (!D)
       return;
-    if (D->hasAssociatedStmt()) {
-      if (const auto *S =
-              dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) {
-        // Do not analyze directives that do not actually require capturing,
-        // like `omp for` or `omp simd` directives.
-        llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
-        getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());
-        if (CaptureRegions.size() == 1 &&
-            CaptureRegions.back() == OMPD_unknown) {
-          VisitStmt(S->getCapturedStmt());
-          return;
-        }
-        VisitOpenMPCapturedStmt(S);
+    if (!D->hasAssociatedStmt())
+      return;
+    if (const auto *S =
+            dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) {
+      // Do not analyze directives that do not actually require capturing,
+      // like `omp for` or `omp simd` directives.
+      llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+      getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());
+      if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
+        VisitStmt(S->getCapturedStmt());
+        return;
       }
+      VisitOpenMPCapturedStmt(S, CaptureRegions.back() == OMPD_parallel);
     }
   }
   void VisitCapturedStmt(const CapturedStmt *S) {
@@ -551,9 +563,9 @@ static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) {
 /// CTA. The threads in the last warp are reserved for master execution.
 /// For the 'spmd' execution mode, all threads in a CTA are part of the team.
 static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
-                                   bool IsInSpmdExecutionMode = false) {
+                                   bool IsInSPMDExecutionMode = false) {
   CGBuilderTy &Bld = CGF.Builder;
-  return IsInSpmdExecutionMode
+  return IsInSPMDExecutionMode
              ? getNVPTXNumThreads(CGF)
              : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
                                 "thread_limit");
@@ -930,7 +942,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(CodeGenFunction &CGF,
   EST.ExitBB = nullptr;
 }
 
-void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
+void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
                                           StringRef ParentName,
                                           llvm::Function *&OutlinedFn,
                                           llvm::Constant *&OutlinedFnID,
@@ -951,10 +963,10 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
                          const OMPExecutableDirective &D)
         : RT(RT), EST(EST), D(D) {}
     void Enter(CodeGenFunction &CGF) override {
-      RT.emitSpmdEntryHeader(CGF, EST, D);
+      RT.emitSPMDEntryHeader(CGF, EST, D);
     }
     void Exit(CodeGenFunction &CGF) override {
-      RT.emitSpmdEntryFooter(CGF, EST);
+      RT.emitSPMDEntryFooter(CGF, EST);
     }
   } Action(*this, EST, D);
   CodeGen.setAction(Action);
@@ -962,7 +974,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
                                    IsOffloadEntry, CodeGen);
 }
 
-void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
+void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
     CodeGenFunction &CGF, EntryFunctionState &EST,
     const OMPExecutableDirective &D) {
   CGBuilderTy &Bld = CGF.Builder;
@@ -974,7 +986,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
   // Initialize the OMP state in the runtime; called by all active threads.
   // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
   // based on code analysis of the target region.
-  llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true),
+  llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
                          /*RequiresOMPRuntime=*/Bld.getInt16(1),
                          /*RequiresDataSharing=*/Bld.getInt16(1)};
   CGF.EmitRuntimeCall(
@@ -986,7 +998,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
   IsInTargetMasterThreadRegion = true;
 }
 
-void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
+void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF,
                                                EntryFunctionState &EST) {
   IsInTargetMasterThreadRegion = false;
   if (!CGF.HaveInsertPoint())
@@ -1465,7 +1477,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
 
   bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
   if (Mode)
-    emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+    emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
                    CodeGen);
   else
     emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
@@ -1483,7 +1495,7 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
 void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
                                               OpenMPProcBindClauseKind ProcBind,
                                               SourceLocation Loc) {
-  // Do nothing in case of Spmd mode and L0 parallel.
+  // Do nothing in case of SPMD mode and L0 parallel.
   if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
     return;
 
@@ -1493,7 +1505,7 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
 void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
                                                 llvm::Value *NumThreads,
                                                 SourceLocation Loc) {
-  // Do nothing in case of Spmd mode and L0 parallel.
+  // Do nothing in case of SPMD mode and L0 parallel.
   if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
     return;
 
@@ -1718,7 +1730,7 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall(
     return;
 
   if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+    emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
   else
     emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
 }
@@ -1904,7 +1916,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
   }
 }
 
-void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
+void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
     ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
   // Just call the outlined function to execute the parallel region.
index c7d647bfdb1171fad3ff6a97829430bf6238325e..f83e99f8a3b727ec795885bc3c31b42381f9eca8 100644 (file)
@@ -77,12 +77,12 @@ private:
   /// Helper for generic variables globalization epilog.
   void emitGenericVarsEpilog(CodeGenFunction &CGF);
 
-  /// Helper for Spmd mode target directive's entry function.
-  void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+  /// Helper for SPMD mode target directive's entry function.
+  void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
                            const OMPExecutableDirective &D);
 
-  /// Signal termination of Spmd mode execution.
-  void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+  /// Signal termination of SPMD mode execution.
+  void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
 
   //
   // Base class overrides.
@@ -119,7 +119,7 @@ private:
   /// \param CodeGen Object containing the target statements.
   /// An outlined function may not be an entry if, e.g. the if clause always
   /// evaluates to false.
-  void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName,
+  void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
                       llvm::Function *&OutlinedFn,
                       llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
                       const RegionCodeGenTy &CodeGen);
@@ -166,7 +166,7 @@ private:
   /// \param IfCond Condition in the associated 'if' clause, if it was
   /// specified, nullptr otherwise.
   ///
-  void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+  void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
                             llvm::Value *OutlinedFn,
                             ArrayRef<llvm::Value *> CapturedVars,
                             const Expr *IfCond);
index 8f496eb80e4affef159f7197dd7bf2c9bd09d110..d1a3104407d0987600a97656856934121dca4d01 100644 (file)
@@ -315,6 +315,15 @@ int bar(int n){
 
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l54}}_worker()
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l54}}(
+// CHECK-32: [[A_ADDR:%.+]] = alloca i32,
+// CHECK-64: [[A_ADDR:%.+]] = alloca i64,
+// CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32*
+// CHECK: [[STACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
+// CHECK: [[BC:%.+]] = bitcast i8* [[STACK]] to %struct._globalized_locals_ty*
+// CHECK-32: [[A:%.+]] = load i32, i32* [[A_ADDR]],
+// CHECK-64: [[A:%.+]] = load i32, i32* [[CONV]],
+// CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: store i32 [[A]], i32* [[GLOBAL_A_ADDR]],
 
 // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}})
 // CHECK:  [[CC:%.+]] = alloca i32,