]> granicus.if.org Git - clang/commitdiff
[OPENMP] Simplify codegen for allocate directive on local variables.
authorAlexey Bataev <a.bataev@hotmail.com>
Thu, 21 Mar 2019 20:36:16 +0000 (20:36 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Thu, 21 Mar 2019 20:36:16 +0000 (20:36 +0000)
Simplified codegen for the allocate directive for local variables,
initial implementation of the codegen for NVPTX target.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356710 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
test/OpenMP/allocate_codegen.cpp
test/OpenMP/nvptx_allocate_codegen.cpp

index d3736b7244d8f75d1737730e3b57b0c553d0f69b..a8af23b63acaaf1197b2b305503902f4f9ec8b0e 100644 (file)
@@ -9745,54 +9745,50 @@ public:
 
 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                    const VarDecl *VD) {
+  if (!VD)
+    return Address::invalid();
   const VarDecl *CVD = VD->getCanonicalDecl();
   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
     return Address::invalid();
-  for (const Attr *A: CVD->getAttrs()) {
-    if (const auto *AA = dyn_cast<OMPAllocateDeclAttr>(A)) {
-      auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-      if (!Elem.second.ServiceInsertPt)
-        setLocThreadIdInsertPt(CGF);
-      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
-      CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
-      llvm::Value *Size;
-      CharUnits Align = CGM.getContext().getDeclAlign(CVD);
-      if (CVD->getType()->isVariablyModifiedType()) {
-        Size = CGF.getTypeSize(CVD->getType());
-        Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
-      } else {
-        CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
-        Align = CGM.getContext().getDeclAlign(CVD);
-        Size = CGM.getSize(Sz.alignTo(Align));
-      }
-      llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
-      llvm::Value *Allocator;
-      if (const Expr *AllocExpr = AA->getAllocator()) {
-        Allocator = CGF.EmitScalarExpr(AllocExpr);
-      } else {
-        // Default allocator in libomp is nullptr.
-        Allocator = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
-      }
-      llvm::Value *Args[] = {ThreadID, Size, Allocator};
-
-      llvm::Value *Addr =
-          CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
-                              CVD->getName() + ".void.addr");
-      llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {
-          ThreadID, Addr, Allocator};
-      llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
-
-      CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
-          NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs));
-      Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-          Addr,
-          CGF.ConvertTypeForMem(
-              CGM.getContext().getPointerType(CVD->getType())),
-          CVD->getName() + ".addr");
-      return Address(Addr, Align);
-    }
-  }
-  return Address::invalid();
+  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+  // Use the default allocation.
+  if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc)
+    return Address::invalid();
+  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+  if (!Elem.second.ServiceInsertPt)
+    setLocThreadIdInsertPt(CGF);
+  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
+  llvm::Value *Size;
+  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+  if (CVD->getType()->isVariablyModifiedType()) {
+    Size = CGF.getTypeSize(CVD->getType());
+    Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
+  } else {
+    CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+    Align = CGM.getContext().getDeclAlign(CVD);
+    Size = CGM.getSize(Sz.alignTo(Align));
+  }
+  llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
+  assert(AA->getAllocator() &&
+         "Expected allocator expression for non-default allocator.");
+  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+  llvm::Value *Args[] = {ThreadID, Size, Allocator};
+
+  llvm::Value *Addr =
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
+                          CVD->getName() + ".void.addr");
+  llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
+                                                              Allocator};
+  llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
+
+  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
+                                                llvm::makeArrayRef(FiniArgs));
+  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      Addr,
+      CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
+      CVD->getName() + ".addr");
+  return Address(Addr, Align);
 }
 
 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
index 7de160322692d7cbe74171fabdbfb3019118f26a..fd294dab64049ff0167adec62b3cb677474408d8 100644 (file)
@@ -4725,6 +4725,28 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
 
 Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                         const VarDecl *VD) {
+  bool UseDefaultAllocator = true;
+  if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
+    const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+    switch (A->getAllocatorType()) {
+      // Use the default allocator here as by default local vars are
+      // threadlocal.
+    case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+    case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+      // Just pass-through to check if the globalization is required.
+      break;
+    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+    case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+    case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+    case OMPAllocateDeclAttr::OMPConstMemAlloc:
+    case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+    case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+      UseDefaultAllocator = false;
+      break;
+    }
+  }
+
   if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
     return Address::invalid();
 
@@ -4746,7 +4768,9 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
         return VDI->second.PrivateAddr;
     }
   }
+
   // TODO: replace it with return
+  // UseDefaultAllocator ? Address::invalid :
   // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
   // supports __kmpc_alloc|__kmpc_free.
   return Address::invalid();
index 6239ded455d35854e48ba8da4a32f10e45f3ed86..daad9353368b4444b9b4dfba4c5cd95d77053202 100644 (file)
@@ -67,25 +67,15 @@ int main () {
   static int a;
 #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
   a=2;
-  // CHECK:      [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
-  // CHECK-NEXT: [[B_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8** null)
-  // CHECK-NEXT: [[B_ADDR:%.+]] = bitcast i8* [[B_VOID_ADDR]] to double*
   // CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
-  // CHECK:      store double 3.000000e+00, double* [[B_ADDR]],
-  // CHECK:      [[RES:%.+]] = call i32 [[FOO:@.+]]()
-  // CHECK:      store i32 [[RES]], i32* [[RET:%.+]],
-  // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[B_VOID_ADDR]], i8** null)
+  // CHECK:      alloca double,
   // CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
   double b = 3;
 #pragma omp allocate(b)
-  // CHECK:      [[RETVAL:%.+]] = load i32, i32* [[RET]],
-  // CHECK:      ret i32 [[RETVAL]]
   return (foo<int>());
 }
 
-// CHECK-NOT:  call {{.+}} {{__kmpc_alloc|__kmpc_free}}
-
-// CHECK: define {{.*}}i32 [[FOO]]()
+// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
 // CHECK:      [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
 // CHECK-NEXT: [[OMP_CGROUP_MEM_ALLOC:%.+]] = load i8**, i8*** @omp_cgroup_mem_alloc,
 // CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8** [[OMP_CGROUP_MEM_ALLOC]])
index e9b9509334e7191045548777cbead9d459e3642c..df6a727c7acffd69cd01d3f2a53a3da2bd26dfea 100644 (file)
@@ -57,7 +57,9 @@ namespace ns{
 }
 #pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
 
+// CHECK-LABEL: @main
 int main () {
+  // CHECK: alloca double,
   static int a;
 #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
   a=2;
@@ -66,6 +68,9 @@ int main () {
   return (foo<int>());
 }
 
+// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
+// CHECK: alloca i32,
+
 extern template int ST<int>::m;
 #pragma omp end declare target
 #endif