From f925ecb766d27b702797a96e904379145d86f334 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 14 Aug 2017 15:01:03 +0000 Subject: [PATCH] [OPENMP] Generalization of calls of the outlined functions. General improvement of the outlined functions calls. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@310840 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 46 +++++++++++++++++----------- lib/CodeGen/CGOpenMPRuntime.h | 8 ++++- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 46 +++++++++++++--------------- lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 2 +- lib/CodeGen/CGStmtOpenMP.cpp | 12 +++++--- test/OpenMP/parallel_codegen.cpp | 2 +- 6 files changed, 65 insertions(+), 51 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 870c141037..afbeb183e8 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2447,7 +2447,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - RT.emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs); + RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; @@ -3348,14 +3348,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { auto *UnRegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), + Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), - Desc); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), + Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); if (CGM.supportsCOMDAT()) { @@ -3859,7 +3859,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, } CallArgs.push_back(SharedsParam); - CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, TaskFunction, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, + CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); @@ -4534,8 +4535,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, - PrePostActionTy &) { + NumDependencies, &DepWaitTaskArgs, + Loc](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, @@ -4546,11 +4547,11 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); - auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, + Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; - CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, TaskEntry, + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, OutlinedFnArgs); }; @@ -7035,7 +7036,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); - emitOutlinedFunctionCall(CGF, OutlinedFn, KernelArgs); + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); @@ -7755,16 +7756,25 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGF.EmitRuntimeCall(RTLFn, Args); } -void CGOpenMPRuntime::emitOutlinedFunctionCall( - CodeGenFunction &CGF, llvm::Value *OutlinedFn, - ArrayRef Args) const { - if (auto *Fn = dyn_cast(OutlinedFn)) { +void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, + ArrayRef Args, + SourceLocation Loc) const { + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); + + if (auto *Fn = dyn_cast(Callee)) { if (Fn->doesNotThrow()) { - CGF.EmitNounwindRuntimeCall(OutlinedFn, Args); + CGF.EmitNounwindRuntimeCall(Fn, Args); return; } } - CGF.EmitRuntimeCall(OutlinedFn, Args); + CGF.EmitRuntimeCall(Callee, Args); +} + +void CGOpenMPRuntime::emitOutlinedFunctionCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef Args) const { + assert(Loc.isValid() && "Outlined function call location must be valid."); + emitCall(CGF, OutlinedFn, Args, Loc); } Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index d28f850867..30e37a8f12 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -250,6 +250,11 @@ protected: // virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; } + /// Emits \p Callee function call with arguments \p Args with location \p Loc. + void emitCall(CodeGenFunction &CGF, llvm::Value *Callee, + ArrayRef Args = llvm::None, + SourceLocation Loc = SourceLocation()) const; + private: /// \brief Default const ident_t object used for initialization of all other /// ident_t objects. @@ -1345,7 +1350,8 @@ public: /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. virtual void - emitOutlinedFunctionCall(CodeGenFunction &CGF, llvm::Value *OutlinedFn, + emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, ArrayRef Args = llvm::None) const; }; diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 867d1e8f7c..fca0a50ca1 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -150,20 +150,18 @@ enum NamedBarrier : unsigned { /// Get the GPU warp size. static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), - llvm::None, "nvptx_warp_size"); + "nvptx_warp_size"); } /// Get the id of the current thread on the GPU. static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), - llvm::None, "nvptx_tid"); + "nvptx_tid"); } /// Get the id of the warp in the block. @@ -185,17 +183,15 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { /// Get the maximum number of threads in a block of the GPU. static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), - llvm::None, "nvptx_num_threads"); + "nvptx_num_threads"); } /// Get barrier to synchronize all threads in a block. static void getNVPTXCTABarrier(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - Bld.CreateCall(llvm::Intrinsic::getDeclaration( + CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); } @@ -205,9 +201,9 @@ static void getNVPTXBarrier(CodeGenFunction &CGF, int ID, llvm::Value *NumThreads) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads}; - Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(), - llvm::Intrinsic::nvvm_barrier), - Args); + CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier), + Args); } /// Synchronize all GPU threads in a block. @@ -345,7 +341,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); CGF.EmitBlock(WorkerBB); - emitOutlinedFunctionCall(CGF, WST.WorkerFn); + emitCall(CGF, WST.WorkerFn); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(MasterCheckBB); @@ -555,7 +551,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0)); llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()}; - emitOutlinedFunctionCall(CGF, Fn, FnArgs); + emitCall(CGF, Fn, FnArgs); // Go to end of parallel region. CGF.EmitBranch(TerminateBB); @@ -883,7 +879,7 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } void CGOpenMPRuntimeNVPTX::emitParallelCall( @@ -932,10 +928,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( auto *ThreadID = getThreadID(CGF, Loc); llvm::Value *Args[] = {RTLoc, ThreadID}; - auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, - PrePostActionTy &) { - auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF, + PrePostActionTy &Action) { Action.Enter(CGF); llvm::SmallVector OutlinedFnArgs; @@ -944,7 +940,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( OutlinedFnArgs.push_back( llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - emitOutlinedFunctionCall(CGF, Fn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs); }; RegionCodeGenTy RCG(CodeGen); @@ -980,7 +976,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( OutlinedFnArgs.push_back( llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } /// This function creates calls to one of two shuffle functions to copy @@ -2296,7 +2292,7 @@ CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF, } void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( - CodeGenFunction &CGF, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef Args) const { SmallVector TargetArgs; auto *FnType = @@ -2314,5 +2310,5 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( TargetArgs.emplace_back( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); } - CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, OutlinedFn, TargetArgs); + CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs); } diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index f7b9264680..a9d6386e6b 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -285,7 +285,7 @@ public: /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. void emitOutlinedFunctionCall( - CodeGenFunction &CGF, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef Args = llvm::None) const override; /// Target codegen is specialized based on two programming models: the diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 4b3a2018e2..c7e74b2c57 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -473,7 +473,6 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { /*RegisterCastedArgsOnly=*/true, CapturedStmtInfo->getHelperName()); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); - WrapperCGF.disableDebugInfo(); Args.clear(); LocalAddrs.clear(); VLASizes.clear(); @@ -501,7 +500,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { } CallArgs.emplace_back(CallArg); } - CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, F, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), + F, CallArgs); WrapperCGF.FinishFunction(); return WrapperF; } @@ -2749,6 +2749,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, OMPPrivateScope Scope(CGF); if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { + enum { PrivatesParam = 2, CopyFnParam = 3 }; auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( @@ -2780,7 +2781,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.EmitRuntimeCall(CopyFn, CallArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CopyFn, CallArgs); for (auto &&Pair : LastprivateDstsOrigs) { auto *OrigVD = cast(Pair.second->getDecl()); DeclRefExpr DRE( @@ -3170,8 +3172,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { llvm::SmallVector CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); - CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, OutlinedFn, - CapturedVars); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + OutlinedFn, CapturedVars); } else { Action.Enter(CGF); CGF.EmitStmt( diff --git a/test/OpenMP/parallel_codegen.cpp b/test/OpenMP/parallel_codegen.cpp index 8bf265c2de..68a178e2c3 100644 --- a/test/OpenMP/parallel_codegen.cpp +++ b/test/OpenMP/parallel_codegen.cpp @@ -122,7 +122,7 @@ int main (int argc, char **argv) { // CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc) // CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc) // CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc) -// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]] +// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg // CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind // CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind -- 2.40.0