From: Alexey Bataev Date: Tue, 4 Dec 2018 15:03:25 +0000 (+0000) Subject: [OPENMP][NVPTX]Mark __kmpc_barrier functions as convergent. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=46d192daa59e1c252a74dbd398c3a07f278979bb;p=clang [OPENMP][NVPTX]Mark __kmpc_barrier functions as convergent. __kmpc_barrier runtime functions must be marked as convergent to prevent some dangerous optimizations. Also, for NVPTX target all barriers must be emitted as simple barriers. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@348271 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index cade093941..66f0783e27 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3214,13 +3214,7 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } -void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind Kind, bool EmitChecks, - bool ForceSimpleCall) { - if (!CGF.HaveInsertPoint()) - return; - // Build call __kmpc_cancel_barrier(loc, thread_id); - // Build call __kmpc_barrier(loc, thread_id); +unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { unsigned Flags; if (Kind == OMPD_for) Flags = OMP_IDENT_BARRIER_IMPL_FOR; @@ -3232,6 +3226,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, Flags = OMP_IDENT_BARRIER_EXPL; else Flags = OMP_IDENT_BARRIER_IMPL; + return Flags; +} + +void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks, + bool ForceSimpleCall) { + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + // Build call __kmpc_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index d332623414..f4a8a97970 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -290,6 +290,10 @@ protected: /// default location. virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } + /// Returns default flags for the barriers depending on the directive, for + /// which this barier is going to be emitted. + static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); + /// Get the LLVM type for the critical name. llvm::ArrayType *getKmpCriticalNameTy() const {return KmpCriticalNameTy;} diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 2bc98f6178..cf814a4b20 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -96,6 +96,8 @@ enum OpenMPRTLFunctionNVPTX { OMPRTL_NVPTX__kmpc_get_team_static_memory, /// Call to void __kmpc_restore_team_static_memory(int16_t is_shared); OMPRTL_NVPTX__kmpc_restore_team_static_memory, + // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -1824,6 +1826,15 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); break; } + case OMPRTL__kmpc_barrier: { + // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); + cast(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + break; + } } return RTLFn; } @@ -2676,6 +2687,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( } } +void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind Kind, bool, + bool) { + // Always emit simple barriers! + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), + getThreadID(CGF, Loc)}; + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); +} + void CGOpenMPRuntimeNVPTX::emitCriticalRegion( CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 8ba2759c8b..8fb3b0a061 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -274,6 +274,18 @@ public: ArrayRef CapturedVars, const Expr *IfCond) override; + /// Emit an implicit/explicit barrier for OpenMP threads. + /// \param Kind Directive for which this implicit barrier call must be + /// generated. Must be OMPD_barrier for explicit barrier generation. + /// \param EmitChecks true if need to emit checks for cancellation barriers. + /// \param ForceSimpleCall true simple barrier call must be emitted, false if + /// runtime class decides which one to emit (simple or with cancellation + /// checks). + /// + void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks = true, + bool ForceSimpleCall = false) override; + /// Emits a critical region. /// \param CriticalName Name of the critical region. /// \param CriticalOpGen Generator for the statement associated with the given diff --git a/test/OpenMP/nvptx_parallel_codegen.cpp b/test/OpenMP/nvptx_parallel_codegen.cpp index 2fd837c92b..08431fccc0 100644 --- a/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/test/OpenMP/nvptx_parallel_codegen.cpp @@ -45,6 +45,7 @@ tx ftemplate(int n) { #pragma omp parallel if(n>1000) { int a = 45; +#pragma omp barrier } a += 1; aa += 1; @@ -317,10 +318,13 @@ int bar(int n){ // CHECK: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) // CHECK: ret void -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l54}}_worker() -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l54}}( +// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[BARRIER_ATTRS:.+]] + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}_worker() +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}( // CHECK-32: [[A_ADDR:%.+]] = alloca i32, // CHECK-64: [[A_ADDR:%.+]] = alloca i64, // CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32* @@ -357,4 +361,6 @@ int bar(int n){ // CHECK: store i32 [[NEW_CC_VAL]], i32* [[CC]], // CHECK: br label +// CHECK: attributes #[[BARRIER_ATTRS]] = {{.*}} convergent {{.*}} + #endif