From: Alexey Bataev Date: Mon, 19 Mar 2018 17:04:07 +0000 (+0000) Subject: [OPENMP, NVPTX] Emit correct thread id. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=15183f0cd211f02406991ed420e9c8711d1c508e;p=clang [OPENMP, NVPTX] Emit correct thread id. We emitted fake thread id for the outined function in NVPTX codegen. Patch adds emission of the real thread id. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@327867 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 410241b618..f8a0772ebb 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -268,6 +268,10 @@ protected: void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee, ArrayRef Args = llvm::None) const; + /// \brief Emits address of the word in a memory where current thread id is + /// stored. + virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); + private: /// \brief Default const ident_t object used for initialization of all other /// ident_t objects. @@ -564,10 +568,6 @@ private: /// \return Cache variable for the specified threadprivate. llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD); - /// \brief Emits address of the word in a memory where current thread id is - /// stored. - virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); - /// \brief Gets (if variable with the given name already exist) or creates /// internal global variable with the specified Name. The created variable has /// linkage CommonLinkage by default and is initialized by null value. diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 0eb01e7afe..c2b846e83a 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -608,7 +608,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); CGF.EmitBlock(WorkerBB); - emitOutlinedFunctionCall(CGF, WST.Loc, WST.WorkerFn); + emitCall(CGF, WST.Loc, WST.WorkerFn); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(MasterCheckBB); @@ -831,10 +831,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Insert call to work function via shared wrapper. The shared // wrapper takes two arguments: // - the parallelism level; - // - the master thread ID; - emitOutlinedFunctionCall(CGF, WST.Loc, W, - {Bld.getInt16(/*ParallelLevel=*/0), - getMasterThreadID(CGF)}); + // - the thread ID; + emitCall(CGF, WST.Loc, W, + {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)}); // Go to end of parallel region. CGF.EmitBranch(TerminateBB); @@ -1316,12 +1315,12 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - Address ZeroAddr = - CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateMemTemp( + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector OutlinedFnArgs; - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -1350,7 +1349,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF, + auto &&L0ParallelGen = [this, WFn, CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { CGBuilderTy &Bld = CGF.Builder; @@ -1420,17 +1419,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( auto *ThreadID = getThreadID(CGF, Loc); llvm::Value *Args[] = {RTLoc, ThreadID}; - auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF, - PrePostActionTy &) { - auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&SeqGen = [this, Fn, CapturedVars, Args, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &&CodeGen = [this, Fn, CapturedVars, Loc](CodeGenFunction &CGF, + PrePostActionTy &Action) { Action.Enter(CGF); llvm::SmallVector OutlinedFnArgs; - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + Address ZeroAddr = + CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( + /*DestWidth=*/32, /*Signed=*/1), + ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs); }; @@ -1468,7 +1470,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -2873,14 +2875,15 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( const auto *RD = CS.getCapturedRecordDecl(); auto CurField = RD->field_begin(); + Address ZeroAddr = CGF.CreateMemTemp( + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + /*Name*/ ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // Get the array of arguments. SmallVector Args; - // TODO: suppport SIMD and pass actual values - Args.emplace_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - Args.emplace_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer()); + Args.emplace_back(ZeroAddr.getPointer()); CGBuilderTy &Bld = CGF.Builder; auto CI = CS.capture_begin(); diff --git a/test/OpenMP/nvptx_teams_codegen.cpp b/test/OpenMP/nvptx_teams_codegen.cpp index c530d19fd7..017b44755c 100644 --- a/test/OpenMP/nvptx_teams_codegen.cpp +++ b/test/OpenMP/nvptx_teams_codegen.cpp @@ -105,7 +105,7 @@ int main (int argc, char **argv) { // CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}}, // CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}}, // CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}, -// CK2-NOT: {{%.+}} = call i32 @__kmpc_global_thread_num( +// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]], // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]], // CK2: store i{{[0-9]+}} [[ARGC_IN]], i{{[0-9]+}}* [[ARGCADDR]], @@ -129,7 +129,7 @@ int main (int argc, char **argv) { // CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}}, // CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}}, // CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}**, -// CK2-NOT: {{%.+}} = call i32 @__kmpc_global_thread_num( +// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]], // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]], // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],