Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
+ if (!VD)
+ return Address::invalid();
const VarDecl *CVD = VD->getCanonicalDecl();
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
return Address::invalid();
- for (const Attr *A: CVD->getAttrs()) {
- if (const auto *AA = dyn_cast<OMPAllocateDeclAttr>(A)) {
- auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
- if (!Elem.second.ServiceInsertPt)
- setLocThreadIdInsertPt(CGF);
- CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
- llvm::Value *Size;
- CharUnits Align = CGM.getContext().getDeclAlign(CVD);
- if (CVD->getType()->isVariablyModifiedType()) {
- Size = CGF.getTypeSize(CVD->getType());
- Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
- } else {
- CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
- Align = CGM.getContext().getDeclAlign(CVD);
- Size = CGM.getSize(Sz.alignTo(Align));
- }
- llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
- llvm::Value *Allocator;
- if (const Expr *AllocExpr = AA->getAllocator()) {
- Allocator = CGF.EmitScalarExpr(AllocExpr);
- } else {
- // Default allocator in libomp is nullptr.
- Allocator = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
- }
- llvm::Value *Args[] = {ThreadID, Size, Allocator};
-
- llvm::Value *Addr =
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
- CVD->getName() + ".void.addr");
- llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {
- ThreadID, Addr, Allocator};
- llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
-
- CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
- NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs));
- Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Addr,
- CGF.ConvertTypeForMem(
- CGM.getContext().getPointerType(CVD->getType())),
- CVD->getName() + ".addr");
- return Address(Addr, Align);
- }
- }
- return Address::invalid();
+ const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+ // Use the default allocation.
+ if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc)
+ return Address::invalid();
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
+ CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
+ llvm::Value *Size;
+ CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+ if (CVD->getType()->isVariablyModifiedType()) {
+ Size = CGF.getTypeSize(CVD->getType());
+ Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
+ } else {
+ CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+ Align = CGM.getContext().getDeclAlign(CVD);
+ Size = CGM.getSize(Sz.alignTo(Align));
+ }
+ llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
+ assert(AA->getAllocator() &&
+ "Expected allocator expression for non-default allocator.");
+ llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+ llvm::Value *Args[] = {ThreadID, Size, Allocator};
+
+ llvm::Value *Addr =
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
+ CVD->getName() + ".void.addr");
+ llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
+ Allocator};
+ llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
+
+ CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
+ llvm::makeArrayRef(FiniArgs));
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Addr,
+ CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
+ CVD->getName() + ".addr");
+ return Address(Addr, Align);
}
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
+ bool UseDefaultAllocator = true;
+ if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
+ const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+ switch (A->getAllocatorType()) {
+ // Use the default allocator here as by default local vars are
+ // threadlocal.
+ case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+ case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+ // Just pass-through to check if the globalization is required.
+ break;
+ case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+ case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+ case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+ case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+ case OMPAllocateDeclAttr::OMPConstMemAlloc:
+ case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+ case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+ UseDefaultAllocator = false;
+ break;
+ }
+ }
+
if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
return Address::invalid();
return VDI->second.PrivateAddr;
}
}
+
// TODO: replace it with return
+ // UseDefaultAllocator ? Address::invalid :
// CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
// supports __kmpc_alloc|__kmpc_free.
return Address::invalid();
static int a;
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
a=2;
- // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
- // CHECK-NEXT: [[B_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8** null)
- // CHECK-NEXT: [[B_ADDR:%.+]] = bitcast i8* [[B_VOID_ADDR]] to double*
// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
- // CHECK: store double 3.000000e+00, double* [[B_ADDR]],
- // CHECK: [[RES:%.+]] = call i32 [[FOO:@.+]]()
- // CHECK: store i32 [[RES]], i32* [[RET:%.+]],
- // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[B_VOID_ADDR]], i8** null)
+ // CHECK: alloca double,
// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
double b = 3;
#pragma omp allocate(b)
- // CHECK: [[RETVAL:%.+]] = load i32, i32* [[RET]],
- // CHECK: ret i32 [[RETVAL]]
return (foo<int>());
}
-// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}}
-
-// CHECK: define {{.*}}i32 [[FOO]]()
+// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
// CHECK-NEXT: [[OMP_CGROUP_MEM_ALLOC:%.+]] = load i8**, i8*** @omp_cgroup_mem_alloc,
// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8** [[OMP_CGROUP_MEM_ALLOC]])