From eeb951c67e02107341bcd6305cf33a7e2e7222fb Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 14 Apr 2015 03:29:22 +0000 Subject: [PATCH] [OPENMP] Initial codegen for 'parallel sections' directive. Emits code for outlined 'parallel' directive with the implicitly inlined 'sections' directive: ... call __kmpc_fork_call(..., outlined_function, ...); ... define internal void outlined_function(...) { ; } Differential Revision: http://reviews.llvm.org/D8997 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@234849 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGStmtOpenMP.cpp | 54 ++++++++----- lib/CodeGen/CodeGenFunction.h | 9 ++- lib/Sema/SemaOpenMP.cpp | 4 + test/OpenMP/parallel_sections_codegen.cpp | 98 +++++++++++++++++++++++ 4 files changed, 139 insertions(+), 26 deletions(-) create mode 100644 test/OpenMP/parallel_sections_codegen.cpp diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index c8c0fd6ed2..fd475ba590 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -883,8 +883,8 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, return LVal; } -void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); +static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { auto *Stmt = cast(S.getAssociatedStmt())->getCapturedStmt(); auto *CS = dyn_cast(Stmt); if (CS && CS->size() > 1) { @@ -904,9 +904,9 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { // Loop counter. LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); - OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); + CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); - OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); + CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, S.getLocStart(), @@ -959,26 +959,27 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { OMPC_SCHEDULE_static); }; - CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); - } else { - // If only one section is found - no need to generate loop, emit as a - // single - // region. - auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.EmitStmt( - cast(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EnsureInsertPoint(); - }; - CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), - llvm::None, llvm::None, llvm::None, - llvm::None); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen); + return OMPD_sections; } + // If only one section is found - no need to generate loop, emit as a single + // region. + auto &&CodeGen = [Stmt](CodeGenFunction &CGF) { + CGF.EmitStmt(Stmt); + CGF.EnsureInsertPoint(); + }; + CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(), + llvm::None, llvm::None, + llvm::None, llvm::None); + return OMPD_single; +} +void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + OpenMPDirectiveKind EmittedAs = emitSections(*this, S); // Emit an implicit barrier at the end. if (!S.getSingleClause(OMPC_nowait)) { - CGM.getOpenMPRuntime().emitBarrierCall( - *this, S.getLocStart(), - (CS && CS->size() > 1) ? OMPD_sections : OMPD_single); + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); } } @@ -1071,8 +1072,17 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( } void CodeGenFunction::EmitOMPParallelSectionsDirective( - const OMPParallelSectionsDirective &) { - llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); + const OMPParallelSectionsDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'sections' directive. + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + (void)emitSections(CGF, S); + // Emit implicit barrier at the end of parallel region. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_parallel); + }; + emitCommonOMPParallelDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 087acc7317..6ba3db0ad7 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2085,15 +2085,16 @@ public: void EmitOMPTargetDirective(const OMPTargetDirective &S); void EmitOMPTeamsDirective(const OMPTeamsDirective &S); + void + EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, + const Expr *IncExpr, + const llvm::function_ref &BodyGen); + private: /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &Directive, bool SeparateIter = false); - void - EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, - const Expr *IncExpr, - const llvm::function_ref &BodyGen); void EmitOMPSimdFinal(const OMPLoopDirective &S); void EmitOMPWorksharingLoop(const OMPLoopDirective &S); void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index bc4d2f56c8..d42152919f 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1117,7 +1117,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { break; } case OMPD_parallel_sections: { + QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); + QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty); Sema::CapturedParamNameType Params[] = { + std::make_pair(".global_tid.", KmpInt32PtrTy), + std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, diff --git a/test/OpenMP/parallel_sections_codegen.cpp b/test/OpenMP/parallel_sections_codegen.cpp new file mode 100644 index 0000000000..825d5a3bc1 --- /dev/null +++ b/test/OpenMP/parallel_sections_codegen.cpp @@ -0,0 +1,98 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER +// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* +// CHECK-LABEL: foo +void foo() {}; +// CHECK-LABEL: bar +void bar() {}; + +template +T tmain() { +#pragma omp parallel sections + { + foo(); + } + return T(); +} + +// CHECK-LABEL: @main +int main() { +// CHECK: call void (%{{.+}}*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%{{.+}}* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}}) +// CHECK-LABEL: } +// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}}) +// CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], +#pragma omp parallel sections + { +// CHECK: store i32 0, i32* [[LB_PTR:%.+]], +// CHECK: store i32 1, i32* [[UB_PTR:%.+]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], +// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1) +// <> +// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]] +// CHECK: [[CMP:%.+]] = icmp slt i32 [[UB]], 1 +// CHECK: [[MIN_UB_GLOBALUB:%.+]] = select i1 [[CMP]], i32 [[UB]], i32 1 +// CHECK: store i32 [[MIN_UB_GLOBALUB]], i32* [[UB_PTR]] +// <> +// CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]] +// CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]] +// CHECK: br label %[[INNER_FOR_COND:.+]] +// CHECK: [[INNER_FOR_COND]] +// <> +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]] +// CHECK: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]] +// CHECK: br i1 [[CMP]], label %[[INNER_LOOP_BODY:.+]], label %[[INNER_LOOP_END:.+]] +// CHECK: [[INNER_LOOP_BODY]] +// <> - > +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK: switch i32 [[IV]], label %[[SECTIONS_EXIT:.+]] [ +// CHECK-NEXT: i32 0, label %[[SECTIONS_CASE0:.+]] +// CHECK-NEXT: i32 1, label %[[SECTIONS_CASE1:.+]] +#pragma omp section +// CHECK: [[SECTIONS_CASE0]] +// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}() +// CHECK: br label %[[SECTIONS_EXIT]] + foo(); +#pragma omp section +// CHECK: [[SECTIONS_CASE1]] +// CHECK-NEXT: invoke void @{{.*}}bar{{.*}}() +// CHECK: br label %[[SECTIONS_EXIT]] + bar(); +// CHECK: [[SECTIONS_EXIT]] +// <<++IV;>> +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK-NEXT: [[INC:%.+]] = add nsw i32 [[IV]], 1 +// CHECK-NEXT: store i32 [[INC]], i32* [[IV_PTR]] +// CHECK-NEXT: br label %[[INNER_FOR_COND]] +// CHECK: [[INNER_LOOP_END]] + } +// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], + return tmain(); +} + +// CHECK-LABEL: tmain +// CHECK: call void {{.*}} @__kmpc_fork_call( +// CHECK-NOT: __kmpc_global_thread_num +// CHECK: [[RES:%.+]] = call i32 @__kmpc_single( +// CHECK-NEXT: [[BOOLRES:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[BOOLRES]], label %[[THEN:.+]], label %[[END:.+]] +// CHECK: [[THEN]] +// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}() +// CHECK-NEXT: unwind label %[[TERM_LPAD:.+]] +// CHECK: call void @__kmpc_end_single( +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], +// CHECK-NEXT: ret +// CHECK: [[TERM_LPAD]] +// CHECK: call void @__clang_call_terminate(i8* +// CHECK-NEXT: unreachable + +#endif -- 2.40.0