[OPENMP]Add codegen for firstprivate vars with allocate clause.

author Alexey Bataev <a.bataev@hotmail.com>

Wed, 3 Apr 2019 17:57:06 +0000 (17:57 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Wed, 3 Apr 2019 17:57:06 +0000 (17:57 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Wed, 3 Apr 2019 17:57:06 +0000 (17:57 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Wed, 3 Apr 2019 17:57:06 +0000 (17:57 +0000)
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp

index 8a871a04f0c7746026948cfd15b18b38cd2b7f0e..6f27d879a9980a1e66a44b17c5d832a9543fb426 100644 (file)
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -1457,7 +1457,13 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
  
    Address address = Address::invalid();
    Address AllocaAddr = Address::invalid();
-  if (Ty->isConstantSizeType()) {
+  Address OpenMPLocalAddr =
+      getLangOpts().OpenMP
+          ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
+          : Address::invalid();
+  if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
+    address = OpenMPLocalAddr;
+  } else if (Ty->isConstantSizeType()) {
      bool NRVO = getLangOpts().ElideConstructors &&
        D.isNRVOVariable();
  
@@ -1500,14 +1506,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
      // unless:
      // - it's an NRVO variable.
      // - we are compiling OpenMP and it's an OpenMP local variable.
-
-    Address OpenMPLocalAddr =
-        getLangOpts().OpenMP
-            ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
-            : Address::invalid();
-    if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
-      address = OpenMPLocalAddr;
-    } else if (NRVO) {
+    if (NRVO) {
        // The named return value optimization: allocate this variable in the
        // return slot, so that we can elide the copy when returning this
        // variable (C++0x [class.copy]p34).
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp

index 65c5c1ca300bf3fac24688105029f7dd511dca03..67465c1acf0ab9d3714354cdc8699a7f505ac8e8 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9768,10 +9768,13 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
    CharUnits Align = CGM.getContext().getDeclAlign(CVD);
    if (CVD->getType()->isVariablyModifiedType()) {
      Size = CGF.getTypeSize(CVD->getType());
-    Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
+    // Align the size: ((size + align - 1) / align) * align
+    Size = CGF.Builder.CreateNUWAdd(
+        Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
+    Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
+    Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
    } else {
      CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
-    Align = CGM.getContext().getDeclAlign(CVD);
      Size = CGM.getSize(Sz.alignTo(Align));
    }
    llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp

index e739a117bd88dbe7fc930471af3faea2c3b2a6b6..d27afcdd330681d867dfda9ee5af9a917590ccf8 100644 (file)
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -750,8 +750,10 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
        bool ThisFirstprivateIsLastprivate =
            Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
        const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
+      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
        if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
-          !FD->getType()->isReferenceType()) {
+          !FD->getType()->isReferenceType() &&
+          (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
          EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
          ++IRef;
          ++InitsRef;
@@ -760,7 +762,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
        // Do not emit copy for firstprivate constant variables in target regions,
        // captured by reference.
        if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
-          FD && FD->getType()->isReferenceType()) {
+          FD && FD->getType()->isReferenceType() &&
+          (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
          (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
                                                                      OrigVD);
          ++IRef;
@@ -770,7 +773,6 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
        FirstprivateIsLastprivate =
            FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
        if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
-        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
          const auto *VDInit =
              cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
          bool IsRegistered;
diff --git a/test/OpenMP/parallel_firstprivate_codegen.cpp b/test/OpenMP/parallel_firstprivate_codegen.cpp

index 6772f61779ef25a53b05a076a9d7ae65461bd54e..e448a406e00454dfd156a7fe12820316ea17b59c 100644 (file)
--- a/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ b/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -33,6 +33,16 @@
  #ifndef HEADER
  #define HEADER
  
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
  struct St {
    int a, b;
    St() : a(0), b(0) {}
@@ -322,7 +332,7 @@ int main() {
      s_arr[0] = var;
      sivar = 2;
    }
-#pragma omp parallel firstprivate(t_var)
+#pragma omp parallel allocate(omp_default_mem_alloc: t_var) firstprivate(t_var)
    {}
    return tmain<int>();
  #endif
@@ -333,6 +343,7 @@ int main() {
  // CHECK: [[T_VAR:%.+]] = alloca i32,
  // CHECK: [[T_VARCAST:%.+]] = alloca [[iz:i64|i32]],
  // CHECK: [[SIVARCAST:%.+]] = alloca [[iz]],
+// CHECK: [[T_VARCAST1:%.+]] = alloca [[iz:i64|i32]],
  // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
  // CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]],
  // CHECK-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST]] to i32*
@@ -345,6 +356,12 @@ int main() {
  // CHECK-32: store i32 [[SIVARVAL]], i32* [[SIVARCAST]],
  // CHECK: [[SIVARPVT:%.+]] = load [[iz]], [[iz]]* [[SIVARCAST]],
  // CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, [[iz]], [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[MAIN_MICROTASK:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]],{{.*}}[[iz]] [[SIVARPVT]]
+// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]],
+// CHECK-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST1]] to i32*
+// CHECK-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]],
+// CHECK-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST1]],
+// CHECK: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST1]],
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]])* [[MAIN_MICROTASK1:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]])
  // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
  // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
  // CHECK: ret
@@ -387,6 +404,24 @@ int main() {
  // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
  // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
  // CHECK: ret void
+
+
+// CHECK:    define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] [[T_VAR:%.+]])
+// CHECK:    [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK:    store [[iz]] [[T_VAR]], [[iz]]* [[T_VAR_ADDR:%.+]],
+// CHECK-64: [[BC:%.+]] = bitcast [[iz]]* [[T_VAR_ADDR]] to i32*
+// CHECK:    [[GTID_PTR:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK:    [[GTID:%.+]] = load i32, i32* [[GTID_PTR]],
+// CHECK:    [[ALLOCATOR:%.+]] = load i8**, i8*** @omp_default_mem_alloc,
+// CHECK:    [[T_VAR_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], [[iz]] 4, i8** [[ALLOCATOR]])
+// CHECK:    [[T_VAR_PRIV:%.+]] = bitcast i8* [[T_VAR_VOID_PTR]] to i32*
+// CHECK-32: [[T_VAR_VAL:%.+]] = load i32, i32* [[T_VAR_ADDR]],
+// CHECK-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]],
+// CHECK:    store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]],
+// CHECK:    call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8** [[ALLOCATOR]])
+// CHECK:    ret void
+
+
  // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
  // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
  // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
@@ -480,6 +515,16 @@ int main() {
  
  #endif
  #else
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
  struct St {
    int a, b;
    St() : a(0), b(0) {}
@@ -488,7 +533,7 @@ struct St {
    void St_func(St s[2], int n, long double vla1[n]) {
      double vla2[n][n] __attribute__((aligned(128)));
      a = b;
-#pragma omp parallel firstprivate(s, vla1, vla2)
+#pragma omp parallel allocate(omp_thread_mem_alloc:vla2) firstprivate(s, vla1, vla2)
      vla1[b] = vla2[1][n - 1] = a = b;
    }
  };
@@ -521,9 +566,18 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
  // ARRAY-DAG: store %struct.St* %{{.+}}, %struct.St** [[PRIV_S]],
  // ARRAY-DAG: store x86_fp80* %{{.+}}, x86_fp80** [[PRIV_VLA1]],
  // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]],
-// ARRAY: call i8* @llvm.stacksave()
  // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8
-// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false)
+// ARRAY: [[SZ1:%.+]] = add nuw i64 [[SIZE]], 127
+// ARRAY: [[SZ2:%.+]] = udiv i64 [[SZ1]], 128
+// ARRAY: [[SIZE:%.+]] = mul nuw i64 [[SZ2]], 128
+// ARRAY: [[ALLOCATOR:%.+]] = load i8**, i8*** @omp_thread_mem_alloc,
+// ARRAY: [[VLA2_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID:%.+]], i64 [[SIZE]], i8** [[ALLOCATOR]])
+// ARRAY: [[VLA2_PTR:%.+]] = bitcast i8* [[VLA2_VOID_PTR]] to double*
+// ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8
+// ARRAY: [[BC:%.+]] = bitcast double* [[VLA2_PTR]] to i8*
+// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC]], i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false)
+// ARRAY: call void @__kmpc_free(i32 [[GTID]], i8* [[VLA2_VOID_PTR]], i8** [[ALLOCATOR]])
+// ARRAY-NEXT: ret void
  #endif
author	Alexey Bataev <a.bataev@hotmail.com>
	Wed, 3 Apr 2019 17:57:06 +0000 (17:57 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Wed, 3 Apr 2019 17:57:06 +0000 (17:57 +0000)
lib/CodeGen/CGDecl.cpp		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntime.cpp		patch \| blob \| history
lib/CodeGen/CGStmtOpenMP.cpp		patch \| blob \| history
test/OpenMP/parallel_firstprivate_codegen.cpp		patch \| blob \| history