CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
+ Out->getLocation());
CodeGenFunction::OMPPrivateScope Scope(CGF);
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
// threadprivate copy of the variable VD
CodeGenFunction CtorCGF(CGM);
FunctionArgList Args;
- ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+ ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
+ /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&Dst);
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
FTy, ".__kmpc_global_ctor_.", FI, Loc);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
- Args, SourceLocation());
+ Args, Loc, Loc);
auto ArgVal = CtorCGF.EmitLoadOfScalar(
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
Address Arg = Address(ArgVal, VDAddr.getAlignment());
- Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
- CtorCGF.ConvertTypeForMem(ASTTy));
+ Arg = CtorCGF.Builder.CreateElementBitCast(
+ Arg, CtorCGF.ConvertTypeForMem(ASTTy));
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
/*IsInitializer=*/true);
ArgVal = CtorCGF.EmitLoadOfScalar(
// of the variable VD
CodeGenFunction DtorCGF(CGM);
FunctionArgList Args;
- ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+ ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
+ /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&Dst);
FTy, ".__kmpc_global_dtor_.", FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
- SourceLocation());
+ Loc, Loc);
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
auto ArgVal = DtorCGF.EmitLoadOfScalar(
FunctionArgList ArgList;
InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
CGM.getTypes().arrangeNullaryFunction(), ArgList,
- Loc);
+ Loc, Loc);
emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
InitCGF.FinishFunction();
return InitFunction;
static llvm::Value *emitCopyprivateCopyFunction(
CodeGenModule &CGM, llvm::Type *ArgsType,
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
- ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
+ ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
+ SourceLocation Loc) {
auto &C = CGM.getContext();
// void copy_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
- ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
".omp.copyprivate.copy_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
// Dest = (void*[n])(LHSArg);
// Src = (void*[n])(RHSArg);
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
// threads in the corresponding parallel region.
auto *CpyFn = emitCopyprivateCopyFunction(
CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
- CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
+ CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
Address CL =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
Args.push_back(&DummyPtr);
CodeGenFunction CGF(CGM);
+ // Disable debug info for global (de-)initializer because they are not part of
+ // some particular construct.
+ CGF.disableDebugInfo();
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto FTy = CGM.getTypes().GetFunctionType(FI);
- auto *Fn =
- CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
+ auto *Fn = CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args);
Codegen(CGF);
CGF.FinishFunction();
return Fn;
llvm::Function *
CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
-
// If we don't have entries or if we are emitting code for the device, we
// don't need to do anything.
if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
".omp_task_entry.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
+ Loc, Loc);
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
// tt,
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
DestructorFnInfo);
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
- Args);
+ Args, Loc, Loc);
LValue Base = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(&TaskTypeArg),
TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
- TaskPrivatesMapFnInfo, Args);
+ TaskPrivatesMapFnInfo, Args, Loc, Loc);
// *privi = &.privates.privi;
LValue Base = CGF.EmitLoadOfPointerLValue(
".omp_task_dup.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
+ Loc);
LValue TDBase = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(&DstArg),
}
llvm::Value *CGOpenMPRuntime::emitReductionFunction(
- CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
- ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
- ArrayRef<const Expr *> ReductionOps) {
+ CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
+ ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
auto &C = CGM.getContext();
// void reduction_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
- ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
".omp.reduction.reduction_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
// Dst = (void*[n])(LHSArg);
// Src = (void*[n])(RHSArg);
// 2. Emit reduce_func().
auto *ReductionFn = emitReductionFunction(
- CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
- LHSExprs, RHSExprs, ReductionOps);
+ CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
+ Privates, LHSExprs, RHSExprs, ReductionOps);
// 3. Create static kmp_critical_name lock = { 0 };
auto *Lock = getCriticalRegionLock(".reduction");
ReductionCodeGen &RCG, unsigned N) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.emplace_back(&Param);
auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
".red_init.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
Address PrivateAddr = CGF.EmitLoadOfPointer(
CGF.GetAddrOfLocalVar(&Param),
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
FunctionArgList Args;
- ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.emplace_back(&ParamInOut);
Args.emplace_back(&ParamIn);
auto &FnInfo =
".red_comb.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
llvm::Value *Size = nullptr;
// If the size of the reduction item is non-constant, load it from global
// threadprivate variable.
return nullptr;
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.emplace_back(&Param);
auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
".red_fini.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
Address PrivateAddr = CGF.EmitLoadOfPointer(
CGF.GetAddrOfLocalVar(&Param),
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
}
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
- CodeGenModule &CGM)
- : WorkerFn(nullptr), CGFI(nullptr) {
+ CodeGenModule &CGM, SourceLocation Loc)
+ : WorkerFn(nullptr), CGFI(nullptr), Loc(Loc) {
createWorkerFunction(CGM);
}
ExecutionModeRAII ModeRAII(CurrentExecutionMode,
CGOpenMPRuntimeNVPTX::ExecutionMode::Generic);
EntryFunctionState EST;
- WorkerFunctionState WST(CGM);
+ WorkerFunctionState WST(CGM, D.getLocStart());
Work.clear();
WrapperFunctionsMap.clear();
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
- CGOpenMPRuntimeNVPTX &RT;
CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
public:
- NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
- CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
+ NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
- : RT(RT), EST(EST), WST(WST) {}
+ : EST(EST), WST(WST) {}
void Enter(CodeGenFunction &CGF) override {
- RT.emitGenericEntryHeader(CGF, EST, WST);
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericEntryHeader(CGF, EST, WST);
}
void Exit(CodeGenFunction &CGF) override {
- RT.emitGenericEntryFooter(CGF, EST);
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericEntryFooter(CGF, EST);
}
- } Action(*this, EST, WST);
+ } Action(EST, WST);
CodeGen.setAction(Action);
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
CGF.EmitBlock(WorkerBB);
- emitCall(CGF, WST.WorkerFn);
+ emitOutlinedFunctionCall(CGF, WST.Loc, WST.WorkerFn);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(MasterCheckBB);
ASTContext &Ctx = CGM.getContext();
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
+ CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {},
+ WST.Loc, WST.Loc);
emitWorkerLoop(CGF, WST);
CGF.FinishFunction();
}
Address Capture = CGF.EmitLoadOfPointer(SharedArgs,
Ctx.getPointerType(
Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>());
- emitCall(CGF, W, {Bld.getInt16(/*ParallelLevel=*/0),
- getMasterThreadID(CGF), Capture.getPointer()});
+ emitOutlinedFunctionCall(CGF, WST.Loc, W,
+ {Bld.getInt16(/*ParallelLevel=*/0),
+ getMasterThreadID(CGF), Capture.getPointer()});
// Go to end of parallel region.
CGF.EmitBranch(TerminateBB);
/// local = local @ remote
/// else
/// local = remote
-static llvm::Value *
-emitReduceScratchpadFunction(CodeGenModule &CGM,
- ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy, llvm::Value *ReduceFn) {
+static llvm::Value *emitReduceScratchpadFunction(
+ CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
auto &C = CGM.getContext();
auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
// Destination of the copy.
- ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// Base address of the scratchpad array, with each element storing a
// Reduce list per team.
- ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// A source index into the scratchpad array.
- ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+ ImplicitParamDecl::Other);
// Row width of an element in the scratchpad array, typically
// the number of teams.
- ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+ ImplicitParamDecl::Other);
// If should_reduce == 1, then it's load AND reduce,
// If should_reduce == 0 (or otherwise), then it only loads (+ copy).
// The latter case is used for initialization.
- ImplicitParamDecl ShouldReduceArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ Int32Ty, ImplicitParamDecl::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
"_omp_reduction_load_and_reduce", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
CodeGenFunction CGF(CGM);
- // We don't need debug information in this function as nothing here refers to
- // user code.
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
auto &Bld = CGF.Builder;
ReduceListAddr.getPointer(), CGF.VoidPtrTy);
llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
RemoteReduceList.getPointer(), CGF.VoidPtrTy);
- CGF.EmitCallOrInvoke(ReduceFn, {LocalDataPtr, RemoteDataPtr});
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+ CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr});
Bld.CreateBr(MergeBB);
CGF.EmitBlock(ElseBB);
///
static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy) {
+ QualType ReductionArrayTy,
+ SourceLocation Loc) {
auto &C = CGM.getContext();
auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
// Source of the copy.
- ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// Base address of the scratchpad array, with each element storing a
// Reduce list per team.
- ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// A destination index into the scratchpad array, typically the team
// identifier.
- ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+ ImplicitParamDecl::Other);
// Row width of an element in the scratchpad array, typically
// the number of teams.
- ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+ ImplicitParamDecl::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
"_omp_reduction_copy_to_scratchpad", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
CodeGenFunction CGF(CGM);
- // We don't need debug information in this function as nothing here refers to
- // user code.
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
auto &Bld = CGF.Builder;
/// sync
static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy) {
+ QualType ReductionArrayTy,
+ SourceLocation Loc) {
auto &C = CGM.getContext();
auto &M = CGM.getModule();
// ReduceList: thread local Reduce list.
// At the stage of the computation when this function is called, partially
// aggregated values reside in the first lane of every active warp.
- ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// NumWarps: number of warps active in the parallel region. This could
// be smaller than 32 (max warps in a CTA) for partial block reduction.
- ImplicitParamDecl NumWarpsArg(C,
+ ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getIntTypeForBitwidth(32, /* Signed */ true),
ImplicitParamDecl::Other);
FunctionArgList Args;
"_omp_reduction_inter_warp_copy_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
CodeGenFunction CGF(CGM);
- // We don't need debug information in this function as nothing here refers to
- // user code.
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
auto &Bld = CGF.Builder;
/// (2k+1)th thread is ignored in the value aggregation. Therefore
/// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
/// that the contiguity assumption still holds.
-static llvm::Value *
-emitShuffleAndReduceFunction(CodeGenModule &CGM,
- ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy, llvm::Value *ReduceFn) {
+static llvm::Value *emitShuffleAndReduceFunction(
+ CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
auto &C = CGM.getContext();
// Thread local Reduce list used to host the values of data to be reduced.
- ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// Current lane id; could be logical.
- ImplicitParamDecl LaneIDArg(C, C.ShortTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy,
+ ImplicitParamDecl::Other);
// Offset of the remote source lane relative to the current lane.
- ImplicitParamDecl RemoteLaneOffsetArg(C, C.ShortTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.ShortTy, ImplicitParamDecl::Other);
// Algorithm version. This is expected to be known at compile time.
- ImplicitParamDecl AlgoVerArg(C, C.ShortTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.ShortTy, ImplicitParamDecl::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
Args.push_back(&LaneIDArg);
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
CodeGenFunction CGF(CGM);
- // We don't need debug information in this function as nothing here refers to
- // user code.
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
auto &Bld = CGF.Builder;
LocalReduceList.getPointer(), CGF.VoidPtrTy);
llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
RemoteReduceList.getPointer(), CGF.VoidPtrTy);
- CGF.EmitCallOrInvoke(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+ CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
Bld.CreateBr(MergeBB);
CGF.EmitBlock(ElseBB);
// 2. Emit reduce_func().
auto *ReductionFn = emitReductionFunction(
- CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
- LHSExprs, RHSExprs, ReductionOps);
+ CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
+ Privates, LHSExprs, RHSExprs, ReductionOps);
// 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
// RedList, shuffle_reduce_func, interwarp_copy_func);
ReductionList.getPointer(), CGF.VoidPtrTy);
auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
- CGM, Privates, ReductionArrayTy, ReductionFn);
+ CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
auto *InterWarpCopyFn =
- emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy);
+ emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
llvm::Value *Res = nullptr;
if (ParallelReduction) {
if (TeamsReduction) {
auto *ScratchPadCopyFn =
- emitCopyToScratchpad(CGM, Privates, ReductionArrayTy);
+ emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc);
auto *LoadAndReduceFn = emitReduceScratchpadFunction(
- CGM, Privates, ReductionArrayTy, ReductionFn);
+ CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
llvm::Value *Args[] = {ThreadId,
CGF.Builder.getInt32(RHSExprs.size()),
Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
QualType Int32PtrQTy = Ctx.getPointerType(Int32QTy);
QualType VoidPtrPtrQTy = Ctx.getPointerType(Ctx.VoidPtrTy);
- ImplicitParamDecl ParallelLevelArg(Ctx, Int16QTy, ImplicitParamDecl::Other);
- ImplicitParamDecl WrapperArg(Ctx, Int32QTy, ImplicitParamDecl::Other);
- ImplicitParamDecl SharedArgsList(Ctx, VoidPtrPtrQTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getLocStart(),
+ /*Id=*/nullptr, Int16QTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getLocStart(),
+ /*Id=*/nullptr, Int32QTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl SharedArgsList(Ctx, /*DC=*/nullptr, D.getLocStart(),
+ /*Id=*/nullptr, VoidPtrPtrQTy,
+ ImplicitParamDecl::Other);
WrapperArgs.emplace_back(&ParallelLevelArg);
WrapperArgs.emplace_back(&WrapperArg);
WrapperArgs.emplace_back(&SharedArgsList);
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
- CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs);
+ CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
+ D.getLocStart(), D.getLocStart());
const auto *RD = CS.getCapturedRecordDecl();
auto CurField = RD->field_begin();
Args.emplace_back(Arg);
}
- emitCall(CGF, OutlinedParallelFn, Args);
+ emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedParallelFn, Args);
CGF.FinishFunction();
return Fn;
}