From: Alexey Bataev Date: Wed, 21 Nov 2018 19:41:10 +0000 (+0000) Subject: [OPENMP]Fix handling of the LCVs in loop-based directives. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0551d7316e608d3cc3efd69418258b69ac2843c6;p=clang [OPENMP]Fix handling of the LCVs in loop-based directives. Loop-control variables with the default data-sharing attributes should not be captured in the OpenMP region as they are private by default. Also, default attributes should be emitted for such variables in the inner OpenMP regions for the correct data sharing during codegen. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@347409 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index a2a935944c..84c3023081 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1275,10 +1275,16 @@ bool DSAStackTy::hasExplicitDSA( return false; std::advance(StartI, Level); auto I = StartI->SharingMap.find(D); - return (I != StartI->SharingMap.end()) && + if ((I != StartI->SharingMap.end()) && I->getSecond().RefExpr.getPointer() && CPred(I->getSecond().Attributes) && - (!NotLastprivate || !I->getSecond().RefExpr.getInt()); + (!NotLastprivate || !I->getSecond().RefExpr.getInt())) + return true; + // Check predetermined rules for the loop control variables. + auto LI = StartI->LCVMap.find(D); + if (LI != StartI->LCVMap.end()) + return CPred(OMPC_private); + return false; } bool DSAStackTy::hasExplicitDirective( diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h index e99d75cdcf..4011ec251c 100644 --- a/lib/Sema/TreeTransform.h +++ b/lib/Sema/TreeTransform.h @@ -6770,6 +6770,9 @@ TreeTransform::TransformDoStmt(DoStmt *S) { template StmtResult TreeTransform::TransformForStmt(ForStmt *S) { + if (getSema().getLangOpts().OpenMP) + getSema().startOpenMPLoop(); + // Transform the initialization statement StmtResult Init = getDerived().TransformStmt(S->getInit()); if (Init.isInvalid()) diff --git a/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp new file mode 100644 index 0000000000..ba99d17bdc --- /dev/null +++ b/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -0,0 +1,90 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[MEM_TY:%.+]] = type { [4 x i8] } +// CHECK-DAG: {{@__omp_offloading_.+}}_l19_exec_mode = weak constant i8 1 +// CHECK-DAG: internal unnamed_addr constant i{{64|32}} 4 + +template +tx ftemplate(int n) { + int i; + + #pragma omp target teams distribute + for (i = 0; i < 10; ++i) + { +#pragma omp parallel + ++i; + } + + return i; +} + +int bar(int n){ + int a = 0; + + a += ftemplate(n); + + return a; +} + + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l19}}_worker() + // CHECK: ret void + + // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l19}}() + + // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] + // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] + // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] + // + // CHECK: [[WORKER]] + // CHECK: {{call|invoke}} void {{@__omp_offloading_.+template.+l19}}_worker() + // CHECK: br label {{%?}}[[EXIT:.+]] + // + // CHECK: [[CHECK_MASTER]] + // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], + // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]] + // + // CHECK: [[MASTER]] + // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] + // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] + // CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* @{{.+}}, i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[BUF:@.+]] to i8**)) + // CHECK: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[BUF]], + // CHECK: [[RD:%.+]] = bitcast i8* [[PTR]] to [[GLOB_TY:%.+]]* + // CHECK: [[I_ADDR:%.+]] = getelementptr inbounds [[GLOB_TY]], [[GLOB_TY]]* [[RD]], i32 0, i32 0 + // + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) + // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_VARS_PTR:%.+]], i{{64|32}} 1) + // CHECK: [[SHARED_VARS_BUF:%.+]] = load i8**, i8*** [[SHARED_VARS_PTR]], + // CHECK: [[I_ADDR_BC:%.+]] = bitcast i32* [[I_ADDR]] to i8* + // CHECK: store i8* [[I_ADDR_BC]], i8** [[SHARED_VARS_BUF]], + // CHECK: call void @llvm.nvvm.barrier0() + // CHECK: call void @llvm.nvvm.barrier0() + // CHECK: call void @__kmpc_end_sharing_variables() + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: br label {{%?}}[[TERMINATE:.+]] + // + // CHECK: [[TERMINATE]] + // CHECK: call void @__kmpc_kernel_deinit( + // CHECK: call void @llvm.nvvm.barrier0() + // CHECK: br label {{%?}}[[EXIT]] + // + // CHECK: [[EXIT]] + // CHECK: ret void + +#endif