From: Alexey Bataev Date: Thu, 12 Mar 2015 08:53:29 +0000 (+0000) Subject: [OPENMP] Initial codegen for 'omp sections' and 'omp section' directives. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e4fa81e9d384fe0e095f8055afc65c1d7f013abc;p=clang [OPENMP] Initial codegen for 'omp sections' and 'omp section' directives. If only one section is found in the sections region, it is emitted just like single region. Otherwise it is emitted as a static non-chunked loop. #pragma omp sections { #pragma omp section {1} ... #pragma omp section {n} } is translated to something like i32 i32 = 0 i32 = 0 i32 = n-1 i32 = 1 call void @__kmpc_for_static_init_4(, i32 , i32 34/*static non-chunked*/, i32* , i32* , i32* , i32* , i32 1/*increment always 1*/, i32 1/*chunk always 1*/) = min(, n-1) = check: br <= , label cont, label exit continue: switch (IV) { case 0: {1}; break; ... case - 1: {n}; break; } ++ br label check exit: call void @__kmpc_for_static_fini(, i32 ) Differential Revision: http://reviews.llvm.org/D8244 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@232021 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index ed6815550f..0a82adf965 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -282,9 +282,10 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, } } -void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, - bool SeparateIter) { +void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, + const Expr *LoopCond, + const Expr *IncExpr, + const std::function &BodyGen) { auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); auto Cnt = getPGORegionCounter(&S); @@ -296,17 +297,13 @@ void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. auto ExitBlock = LoopExit.getBlock(); - if (LoopScope.requiresCleanups()) + if (RequiresCleanup) ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); auto LoopBody = createBasicBlock("omp.inner.for.body"); - // Emit condition: "IV < LastIteration + 1 [ - 1]" - // ("- 1" when lastprivate clause is present - separate one iteration). - llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter)); - Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, - PGO.createLoopWeights(S.getCond(SeparateIter), Cnt)); - + // Emit condition. + EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount()); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); EmitBranchThroughCleanup(LoopExit); @@ -319,12 +316,11 @@ void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - EmitOMPLoopBody(S); - EmitStopPoint(&S); + BodyGen(); // Emit "IV = IV + 1" and a back-edge to the condition block. EmitBlock(Continue.getBlock()); - EmitIgnoredExpr(S.getInc()); + EmitIgnoredExpr(IncExpr); BreakContinueStack.pop_back(); EmitBranch(CondBlock); LoopStack.pop(); @@ -460,7 +456,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { { OMPPrivateScope LoopScope(*this); EmitPrivateLoopCounters(*this, LoopScope, S.counters()); - EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/true), S.getInc(), + [&S, this]() { + EmitOMPLoopBody(S); + EmitStopPoint(&S); + }); EmitOMPLoopBody(S, /* SeparateIter */ true); } EmitOMPSimdFinal(S); @@ -471,7 +472,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { { OMPPrivateScope LoopScope(*this); EmitPrivateLoopCounters(*this, LoopScope, S.counters()); - EmitOMPInnerLoop(S, LoopScope); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/false), S.getInc(), + [&S, this]() { + EmitOMPLoopBody(S); + EmitStopPoint(&S); + }); } EmitOMPSimdFinal(S); } @@ -543,7 +549,11 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - EmitOMPInnerLoop(S, LoopScope); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() { + EmitOMPLoopBody(S); + EmitStopPoint(&S); + }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); @@ -638,7 +648,12 @@ void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // IV = LB; EmitIgnoredExpr(S.getInit()); // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/false), S.getInc(), + [&S, this]() { + EmitOMPLoopBody(S); + EmitStopPoint(&S); + }); // Tell the runtime we are done. RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); } else { @@ -669,12 +684,108 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); } -void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) { - llvm_unreachable("CodeGen for 'omp sections' is not supported yet."); +static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, + const Twine &Name, + llvm::Value *Init = nullptr) { + auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); + if (Init) + CGF.EmitScalarInit(Init, LVal); + return LVal; +} + +void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { + InlinedOpenMPRegionScopeRAII Region(*this, S); + + auto *Stmt = cast(S.getAssociatedStmt())->getCapturedStmt(); + auto *CS = dyn_cast(Stmt); + if (CS && CS->size() > 1) { + auto &C = CGM.getContext(); + auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + // Emit helper vars inits. + LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.", + Builder.getInt32(0)); + auto *GlobalUBVal = Builder.getInt32(CS->size() - 1); + LValue UB = + createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); + LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.", + Builder.getInt32(1)); + LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.", + Builder.getInt32(0)); + // Loop counter. + LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv."); + OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV); + OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB); + // Generate condition for loop. + BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, + OK_Ordinary, S.getLocStart(), /*fpContractable=*/false); + // Increment for loop counter. + UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, + S.getLocStart()); + auto BodyGen = [this, CS, &S, &IV]() { + // Iterate through all sections and emit a switch construct: + // switch (IV) { + // case 0: + // ; + // break; + // ... + // case - 1: + // - 1]>; + // break; + // } + // .omp.sections.exit: + auto *ExitBB = createBasicBlock(".omp.sections.exit"); + auto *SwitchStmt = Builder.CreateSwitch( + EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, + CS->size()); + unsigned CaseNumber = 0; + for (auto C = CS->children(); C; ++C, ++CaseNumber) { + auto CaseBB = createBasicBlock(".omp.sections.case"); + EmitBlock(CaseBB); + SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); + EmitStmt(*C); + EmitBranch(ExitBB); + } + EmitBlock(ExitBB, /*IsFinished=*/true); + }; + // Emit static non-chunked loop. + CGM.getOpenMPRuntime().emitForInit( + *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, + /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), + ST.getAddress()); + // UB = min(UB, GlobalUB); + auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart()); + auto *MinUBGlobalUB = Builder.CreateSelect( + Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); + EmitStoreOfScalar(MinUBGlobalUB, UB); + // IV = LB; + EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV); + // while (idx <= UB) { BODY; ++idx; } + EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); + // Tell the runtime we are done. + CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(), + OMPC_SCHEDULE_static); + } else { + // If only one section is found - no need to generate loop, emit as a single + // region. + CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { + InlinedOpenMPRegionScopeRAII Region(*this, S); + EmitStmt(Stmt); + EnsureInsertPoint(); + }, S.getLocStart()); + } + + // Emit an implicit barrier at the end. + if (!S.getSingleClause(OMPC_nowait)) + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), + /*IsExplicit=*/false); } -void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) { - llvm_unreachable("CodeGen for 'omp section' is not supported yet."); +void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { + InlinedOpenMPRegionScopeRAII Region(*this, S); + EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + EnsureInsertPoint(); } void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 2103766728..4c62a4914c 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2094,8 +2094,9 @@ private: /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &Directive, bool SeparateIter = false); - void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope, - bool SeparateIter = false); + void EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, + const Expr *LoopCond, const Expr *IncExpr, + const std::function &BodyGen); void EmitOMPSimdFinal(const OMPLoopDirective &S); void EmitOMPWorksharingLoop(const OMPLoopDirective &S); void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, diff --git a/test/OpenMP/sections_codegen.cpp b/test/OpenMP/sections_codegen.cpp new file mode 100644 index 0000000000..9485d9ffe2 --- /dev/null +++ b/test/OpenMP/sections_codegen.cpp @@ -0,0 +1,96 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: foo +void foo() {}; +// CHECK-LABEL: bar +void bar() {}; + +template +T tmain() { +#pragma omp parallel +#pragma omp sections + { + foo(); + } + return T(); +} + +// CHECK-LABEL: @main +int main() { + float l = 0.0; // Used as a base point in checks. +// CHECK: [[GTID:%.+]] = call{{.*}} i32 @__kmpc_global_thread_num({{.*}}) +// CHECK: store float +#pragma omp sections nowait + { +// CHECK: store i32 0, i32* [[LB_PTR:%.+]], +// CHECK: store i32 1, i32* [[UB_PTR:%.+]], +// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1) +// <> +// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]] +// CHECK: [[CMP:%.+]] = icmp slt i32 [[UB]], 1 +// CHECK: [[MIN_UB_GLOBALUB:%.+]] = select i1 [[CMP]], i32 [[UB]], i32 1 +// CHECK: store i32 [[MIN_UB_GLOBALUB]], i32* [[UB_PTR]] +// <> +// CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]] +// CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]] +// CHECK: br label %[[INNER_FOR_COND:.+]] +// CHECK: [[INNER_FOR_COND]] +// <> +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]] +// CHECK: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]] +// CHECK: br i1 [[CMP]], label %[[INNER_LOOP_BODY:.+]], label %[[INNER_LOOP_END:.+]] +// CHECK: [[INNER_LOOP_BODY]] +// <> - > +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK: switch i32 [[IV]], label %[[SECTIONS_EXIT:.+]] [ +// CHECK-NEXT: i32 0, label %[[SECTIONS_CASE0:.+]] +// CHECK-NEXT: i32 1, label %[[SECTIONS_CASE1:.+]] +#pragma omp section +// CHECK: [[SECTIONS_CASE0]] +// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}() +// CHECK: br label %[[SECTIONS_EXIT]] + foo(); +#pragma omp section +// CHECK: [[SECTIONS_CASE1]] +// CHECK-NEXT: invoke void @{{.*}}bar{{.*}}() +// CHECK: br label %[[SECTIONS_EXIT]] + bar(); +// CHECK: [[SECTIONS_EXIT]] +// <<++IV;>> +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK-NEXT: [[INC:%.+]] = add nsw i32 [[IV]], 1 +// CHECK-NEXT: store i32 [[INC]], i32* [[IV_PTR]] +// CHECK-NEXT: br label %[[INNER_FOR_COND]] +// CHECK: [[INNER_LOOP_END]] + } +// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK-NOT: __kmpc_cancel_barrier + return tmain(); +} + +// CHECK-LABEL: tmain +// CHECK: call void {{.*}} @__kmpc_fork_call( +// CHECK-NOT: __kmpc_global_thread_num +// CHECK: [[RES:%.+]] = call i32 @__kmpc_single( +// CHECK-NEXT: [[BOOLRES:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[BOOLRES]], label %[[THEN:.+]], label %[[END:.+]] +// CHECK: [[THEN]] +// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}() +// CHECK-NEXT: unwind label %[[TERM_LPAD:.+]] +// CHECK: call void @__kmpc_end_single( +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: call i32 @__kmpc_cancel_barrier( +// CHECK-NEXT: ret +// CHECK: [[TERM_LPAD]] +// CHECK: call void @__clang_call_terminate(i8* +// CHECK-NEXT: unreachable + +#endif