From: Alexey Bataev Date: Thu, 27 Jul 2017 13:20:36 +0000 (+0000) Subject: [OPENMP] Codegen for 'in_reduction' clause. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=36f9c8f5c6d0c971b08cdfd132c7ce3f4a607f87;p=clang [OPENMP] Codegen for 'in_reduction' clause. Added codegen for task-based directive with in_reduction clause. ``` ``` The next code is emitted: ``` void *td; ... td = call i8* @__kmpc_task_reduction_init(); ... *priv = ( *)call i8* @__kmpc_task_reduction_get_th_data(i32 GTID, i8* td, i8* ) ``` git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@309270 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/AST/OpenMPClause.h b/include/clang/AST/OpenMPClause.h index 87d340a019..24d51bc747 100644 --- a/include/clang/AST/OpenMPClause.h +++ b/include/clang/AST/OpenMPClause.h @@ -2212,6 +2212,17 @@ class OMPInReductionClause final return llvm::makeArrayRef(getRHSExprs().end(), varlist_size()); } + /// Set list of helper reduction taskgroup descriptors. + void setTaskgroupDescriptors(ArrayRef ReductionOps); + + /// Get the list of helper reduction taskgroup descriptors. + MutableArrayRef getTaskgroupDescriptors() { + return MutableArrayRef(getReductionOps().end(), varlist_size()); + } + ArrayRef getTaskgroupDescriptors() const { + return llvm::makeArrayRef(getReductionOps().end(), varlist_size()); + } + public: /// Creates clause with a list of variables \a VL. /// @@ -2241,6 +2252,8 @@ public: /// \endcode /// Required for proper codegen of final reduction operation performed by the /// reduction clause. + /// \param TaskgroupDescriptors List of helper taskgroup descriptors for + /// corresponding items in parent taskgroup task_reduction clause. /// \param PreInit Statement that must be executed before entering the OpenMP /// region with this clause. /// \param PostUpdate Expression that must be executed after exit from the @@ -2252,7 +2265,8 @@ public: NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, - ArrayRef ReductionOps, Stmt *PreInit, Expr *PostUpdate); + ArrayRef ReductionOps, ArrayRef TaskgroupDescriptors, + Stmt *PreInit, Expr *PostUpdate); /// Creates an empty clause with the place for \a N variables. /// @@ -2300,6 +2314,14 @@ public: return helper_expr_range(getReductionOps().begin(), getReductionOps().end()); } + helper_expr_const_range taskgroup_descriptors() const { + return helper_expr_const_range(getTaskgroupDescriptors().begin(), + getTaskgroupDescriptors().end()); + } + helper_expr_range taskgroup_descriptors() { + return helper_expr_range(getTaskgroupDescriptors().begin(), + getTaskgroupDescriptors().end()); + } child_range children() { return child_range(reinterpret_cast(varlist_begin()), diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h index 0a16540e1a..b696a03285 100644 --- a/include/clang/AST/RecursiveASTVisitor.h +++ b/include/clang/AST/RecursiveASTVisitor.h @@ -3057,6 +3057,8 @@ bool RecursiveASTVisitor::VisitOMPInReductionClause( for (auto *E : C->reduction_ops()) { TRY_TO(TraverseStmt(E)); } + for (auto *E : C->taskgroup_descriptors()) + TRY_TO(TraverseStmt(E)); return true; } diff --git a/lib/AST/OpenMPClause.cpp b/lib/AST/OpenMPClause.cpp index 9dcf5c1920..497c605cae 100644 --- a/lib/AST/OpenMPClause.cpp +++ b/lib/AST/OpenMPClause.cpp @@ -593,14 +593,23 @@ void OMPInReductionClause::setReductionOps(ArrayRef ReductionOps) { std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end()); } +void OMPInReductionClause::setTaskgroupDescriptors( + ArrayRef TaskgroupDescriptors) { + assert(TaskgroupDescriptors.size() == varlist_size() && + "Number of in reduction descriptors is not the same as the " + "preallocated buffer"); + std::copy(TaskgroupDescriptors.begin(), TaskgroupDescriptors.end(), + getReductionOps().end()); +} + OMPInReductionClause *OMPInReductionClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef VL, NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, ArrayRef Privates, ArrayRef LHSExprs, - ArrayRef RHSExprs, ArrayRef ReductionOps, Stmt *PreInit, - Expr *PostUpdate) { - void *Mem = C.Allocate(totalSizeToAlloc(5 * VL.size())); + ArrayRef RHSExprs, ArrayRef ReductionOps, + ArrayRef TaskgroupDescriptors, Stmt *PreInit, Expr *PostUpdate) { + void *Mem = C.Allocate(totalSizeToAlloc(6 * VL.size())); OMPInReductionClause *Clause = new (Mem) OMPInReductionClause( StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo); Clause->setVarRefs(VL); @@ -608,6 +617,7 @@ OMPInReductionClause *OMPInReductionClause::Create( Clause->setLHSExprs(LHSExprs); Clause->setRHSExprs(RHSExprs); Clause->setReductionOps(ReductionOps); + Clause->setTaskgroupDescriptors(TaskgroupDescriptors); Clause->setPreInitStmt(PreInit); Clause->setPostUpdateExpr(PostUpdate); return Clause; @@ -615,7 +625,7 @@ OMPInReductionClause *OMPInReductionClause::Create( OMPInReductionClause *OMPInReductionClause::CreateEmpty(const ASTContext &C, unsigned N) { - void *Mem = C.Allocate(totalSizeToAlloc(5 * N)); + void *Mem = C.Allocate(totalSizeToAlloc(6 * N)); return new (Mem) OMPInReductionClause(N); } diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp index 1b6c83fe30..c1ba1e8e94 100644 --- a/lib/AST/StmtProfile.cpp +++ b/lib/AST/StmtProfile.cpp @@ -596,6 +596,10 @@ void OMPClauseProfiler::VisitOMPInReductionClause( if (E) Profiler->VisitStmt(E); } + for (auto *E : C->taskgroup_descriptors()) { + if (E) + Profiler->VisitStmt(E); + } } void OMPClauseProfiler::VisitOMPLinearClause(const OMPLinearClause *C) { VisitOMPClauseList(C); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 130db5f2c6..29b63a85ec 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -2805,7 +2805,57 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, RedCG, Cnt); } } + // Privatize all private variables except for in_reduction items. (void)Scope.Privatize(); + SmallVector InRedVars; + SmallVector InRedPrivs; + SmallVector InRedOps; + SmallVector TaskgroupDescriptors; + for (const auto *C : S.getClausesOfKind()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ITD = C->taskgroup_descriptors().begin(); + for (const auto *Ref : C->varlists()) { + InRedVars.emplace_back(Ref); + InRedPrivs.emplace_back(*IPriv); + InRedOps.emplace_back(*IRed); + TaskgroupDescriptors.emplace_back(*ITD); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ITD, 1); + } + } + // Privatize in_reduction items here, because taskgroup descriptors must be + // privatized earlier. + OMPPrivateScope InRedScope(CGF); + if (!InRedVars.empty()) { + ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); + for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + // The taskgroup descriptor variable is always implicit firstprivate and + // privatized already during procoessing of the firstprivates. + llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( + CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = Address( + CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } + (void)InRedScope.Privatize(); Action.Enter(CGF); BodyGen(CGF); diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index d3b0f46e90..4f1cd5c245 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -255,11 +255,13 @@ public: /// Returns the location and reduction operation from the innermost parent /// region for the given \p D. DSAVarData getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR, - BinaryOperatorKind &BOK); + BinaryOperatorKind &BOK, + Expr *&TaskgroupDescriptor); /// Returns the location and reduction operation from the innermost parent /// region for the given \p D. DSAVarData getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR, - const Expr *&ReductionRef); + const Expr *&ReductionRef, + Expr *&TaskgroupDescriptor); /// Return reduction reference expression for the current taskgroup. Expr *getTaskgroupReductionRef() const { assert(Stack.back().first.back().Directive == OMPD_taskgroup && @@ -267,6 +269,13 @@ public: "directive."); return Stack.back().first.back().TaskgroupReductionRef; } + /// Checks if the given \p VD declaration is actually a taskgroup reduction + /// descriptor variable at the \p Level of OpenMP regions. + bool isTaskgroupReductionRef(ValueDecl *VD, unsigned Level) const { + return Stack.back().first[Level].TaskgroupReductionRef && + cast(Stack.back().first[Level].TaskgroupReductionRef) + ->getDecl() == VD; + } /// \brief Returns data sharing attributes from top of the stack for the /// specified declaration. @@ -831,7 +840,8 @@ void DSAStackTy::addTaskgroupReductionData(ValueDecl *D, SourceRange SR, DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR, - BinaryOperatorKind &BOK) { + BinaryOperatorKind &BOK, + Expr *&TaskgroupDescriptor) { D = getCanonicalDecl(D); assert(!isStackEmpty() && "Data-sharing attributes stack is empty."); if (Stack.back().first.empty()) @@ -848,6 +858,10 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR, return DSAVarData(); SR = ReductionData.ReductionRange; BOK = ReductionData.ReductionOp.get(); + assert(I->TaskgroupReductionRef && "taskgroup reduction reference " + "expression for the descriptor is not " + "set."); + TaskgroupDescriptor = I->TaskgroupReductionRef; return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(), Data.PrivateCopy, I->DefaultAttrLoc); } @@ -856,7 +870,8 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR, DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR, - const Expr *&ReductionRef) { + const Expr *&ReductionRef, + Expr *&TaskgroupDescriptor) { D = getCanonicalDecl(D); assert(!isStackEmpty() && "Data-sharing attributes stack is empty."); if (Stack.back().first.empty()) @@ -873,6 +888,10 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR, return DSAVarData(); SR = ReductionData.ReductionRange; ReductionRef = ReductionData.ReductionOp.get(); + assert(I->TaskgroupReductionRef && "taskgroup reduction reference " + "expression for the descriptor is not " + "set."); + TaskgroupDescriptor = I->TaskgroupReductionRef; return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(), Data.PrivateCopy, I->DefaultAttrLoc); } @@ -1298,7 +1317,14 @@ VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) { bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) { assert(LangOpts.OpenMP && "OpenMP is not allowed"); return DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level); + D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, + Level) || + // Consider taskgroup reduction descriptor variable a private to avoid + // possible capture in the region. + (DSAStack->hasExplicitDirective( + [](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; }, + Level) && + DSAStack->isTaskgroupReductionRef(D, Level)); } bool Sema::isOpenMPTargetCapturedDecl(ValueDecl *D, unsigned Level) { @@ -2137,6 +2163,15 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, SmallVector PICs; // This is required for proper codegen. for (auto *Clause : Clauses) { + if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) && + Clause->getClauseKind() == OMPC_in_reduction) { + // Capture taskgroup task_reduction descriptors inside the tasking regions + // with the corresponding in_reduction items. + auto *IRC = cast(Clause); + for (auto *E : IRC->taskgroup_descriptors()) + if (E) + MarkDeclarationsReferencedInExpr(E); + } if (isOpenMPPrivate(Clause->getClauseKind()) || Clause->getClauseKind() == OMPC_copyprivate || (getLangOpts().OpenMPUseTLS && @@ -2567,13 +2602,24 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( // Generate list of implicitly defined firstprivate variables. VarsWithInheritedDSA = DSAChecker.getVarsWithInheritedDSA(); - if (!DSAChecker.getImplicitFirstprivate().empty()) { + SmallVector ImplicitFirstprivates( + DSAChecker.getImplicitFirstprivate().begin(), + DSAChecker.getImplicitFirstprivate().end()); + // Mark taskgroup task_reduction descriptors as implicitly firstprivate. + for (auto *C : Clauses) { + if (auto *IRC = dyn_cast(C)) { + for (auto *E : IRC->taskgroup_descriptors()) + if (E) + ImplicitFirstprivates.emplace_back(E); + } + } + if (!ImplicitFirstprivates.empty()) { if (OMPClause *Implicit = ActOnOpenMPFirstprivateClause( - DSAChecker.getImplicitFirstprivate(), SourceLocation(), - SourceLocation(), SourceLocation())) { + ImplicitFirstprivates, SourceLocation(), SourceLocation(), + SourceLocation())) { ClausesWithImplicit.push_back(Implicit); ErrorFound = cast(Implicit)->varlist_size() != - DSAChecker.getImplicitFirstprivate().size(); + ImplicitFirstprivates.size(); } else ErrorFound = true; } @@ -9047,6 +9093,9 @@ struct ReductionData { SmallVector RHSs; /// Reduction operation expression. SmallVector ReductionOps; + /// Taskgroup descriptors for the corresponding reduction items in + /// in_reduction clauses. + SmallVector TaskgroupDescriptors; /// List of captures for clause. SmallVector ExprCaptures; /// List of postupdate expressions. @@ -9059,6 +9108,7 @@ struct ReductionData { LHSs.reserve(Size); RHSs.reserve(Size); ReductionOps.reserve(Size); + TaskgroupDescriptors.reserve(Size); ExprCaptures.reserve(Size); ExprPostUpdates.reserve(Size); } @@ -9070,15 +9120,17 @@ struct ReductionData { LHSs.emplace_back(nullptr); RHSs.emplace_back(nullptr); ReductionOps.emplace_back(ReductionOp); + TaskgroupDescriptors.emplace_back(nullptr); } /// Stores reduction data. - void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS, - Expr *ReductionOp) { + void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS, Expr *ReductionOp, + Expr *TaskgroupDescriptor) { Vars.emplace_back(Item); Privates.emplace_back(Private); LHSs.emplace_back(LHS); RHSs.emplace_back(RHS); ReductionOps.emplace_back(ReductionOp); + TaskgroupDescriptors.emplace_back(TaskgroupDescriptor); } }; } // namespace @@ -9217,6 +9269,7 @@ static bool ActOnOMPReductionKindClause( if (!D) continue; + Expr *TaskgroupDescriptor = nullptr; QualType Type; auto *ASE = dyn_cast(RefExpr->IgnoreParens()); auto *OASE = dyn_cast(RefExpr->IgnoreParens()); @@ -9593,11 +9646,13 @@ static bool ActOnOMPReductionKindClause( SourceRange ParentSR; BinaryOperatorKind ParentBOK; const Expr *ParentReductionOp; + Expr *ParentBOKTD, *ParentReductionOpTD; DSAStackTy::DSAVarData ParentBOKDSA = - Stack->getTopMostTaskgroupReductionData(D, ParentSR, ParentBOK); + Stack->getTopMostTaskgroupReductionData(D, ParentSR, ParentBOK, + ParentBOKTD); DSAStackTy::DSAVarData ParentReductionOpDSA = - Stack->getTopMostTaskgroupReductionData(D, ParentSR, - ParentReductionOp); + Stack->getTopMostTaskgroupReductionData( + D, ParentSR, ParentReductionOp, ParentReductionOpTD); bool IsParentBOK = ParentBOKDSA.DKind != OMPD_unknown; bool IsParentReductionOp = ParentReductionOpDSA.DKind != OMPD_unknown; if (!IsParentBOK && !IsParentReductionOp) { @@ -9628,6 +9683,8 @@ static bool ActOnOMPReductionKindClause( continue; } } + TaskgroupDescriptor = IsParentBOK ? ParentBOKTD : ParentReductionOpTD; + assert(TaskgroupDescriptor && "Taskgroup descriptor must be defined."); } DeclRefExpr *Ref = nullptr; @@ -9674,7 +9731,8 @@ static bool ActOnOMPReductionKindClause( else Stack->addTaskgroupReductionData(D, ReductionIdRange, BOK); } - RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get()); + RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get(), + TaskgroupDescriptor); } return RD.Vars.empty(); } @@ -9737,7 +9795,7 @@ OMPClause *Sema::ActOnOpenMPInReductionClause( return OMPInReductionClause::Create( Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars, ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, - RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, + RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, RD.TaskgroupDescriptors, buildPreInits(Context, RD.ExprCaptures), buildPostUpdate(*this, RD.ExprPostUpdates)); } diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp index 0eeb9c9d25..bff8f9d05f 100644 --- a/lib/Serialization/ASTReaderStmt.cpp +++ b/lib/Serialization/ASTReaderStmt.cpp @@ -2227,6 +2227,10 @@ void OMPClauseReader::VisitOMPInReductionClause(OMPInReductionClause *C) { for (unsigned I = 0; I != NumVars; ++I) Vars.push_back(Reader->Record.readSubExpr()); C->setReductionOps(Vars); + Vars.clear(); + for (unsigned I = 0; I != NumVars; ++I) + Vars.push_back(Reader->Record.readSubExpr()); + C->setTaskgroupDescriptors(Vars); } void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) { diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp index 04d56a7859..1bc64b6a17 100644 --- a/lib/Serialization/ASTWriterStmt.cpp +++ b/lib/Serialization/ASTWriterStmt.cpp @@ -2018,6 +2018,8 @@ void OMPClauseWriter::VisitOMPInReductionClause(OMPInReductionClause *C) { Record.AddStmt(E); for (auto *E : C->reduction_ops()) Record.AddStmt(E); + for (auto *E : C->taskgroup_descriptors()) + Record.AddStmt(E); } void OMPClauseWriter::VisitOMPLinearClause(OMPLinearClause *C) { diff --git a/test/OpenMP/task_in_reduction_codegen.cpp b/test/OpenMP/task_in_reduction_codegen.cpp new file mode 100644 index 0000000000..2ad013a14a --- /dev/null +++ b/test/OpenMP/task_in_reduction_codegen.cpp @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* } + +struct S { + int a; + S() : a(0) {} + S(const S&) {} + S& operator=(const S&) {return *this;} + ~S() {} + friend S operator+(const S&a, const S&b) {return a;} +}; + + +int main(int argc, char **argv) { + int a; + float b; + S c[5]; + short d[argc]; +#pragma omp taskgroup task_reduction(+: a, b, argc) + { +#pragma omp taskgroup task_reduction(-:c, d) +#pragma omp parallel +#pragma omp task in_reduction(+:a) in_reduction(-:d) + a += d[a]; + } + return 0; +} + +// CHECK-LABEL: @main +// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]]) +// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* % +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]], +// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* % +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]], +// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) + +// CHECK: define internal void [[OMP_PARALLEL]]( +// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 56, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]* +// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1 +// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0 +// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]], +// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1 +// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]], +// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]], +// CHECK-NEXT: call i32 @__kmpc_omp_task(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +// CHECK: define internal {{.*}} [[OMP_TASK]]( +// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]]) +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]], +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]], +// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* % +// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]]) +// CHECK: [[D_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]], +// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]]) +// CHECK: add nsw i32 +// CHECK: store i32 % +#endif diff --git a/test/OpenMP/taskloop_in_reduction_codegen.cpp b/test/OpenMP/taskloop_in_reduction_codegen.cpp new file mode 100644 index 0000000000..e9ee9a32b9 --- /dev/null +++ b/test/OpenMP/taskloop_in_reduction_codegen.cpp @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* } + +struct S { + int a; + S() : a(0) {} + S(const S&) {} + S& operator=(const S&) {return *this;} + ~S() {} + friend S operator+(const S&a, const S&b) {return a;} +}; + + +int main(int argc, char **argv) { + int a; + float b; + S c[5]; + short d[argc]; +#pragma omp taskgroup task_reduction(+: a, b, argc) + { +#pragma omp taskgroup task_reduction(-:c, d) +#pragma omp parallel +#pragma omp taskloop in_reduction(+:a) in_reduction(-:d) + for (int i = 0; i < 5; ++i) + a += d[a]; + } + return 0; +} + +// CHECK-LABEL: @main +// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]]) +// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* % +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]], +// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* % +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]], +// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) + +// CHECK: define internal void [[OMP_PARALLEL]]( +// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]* +// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1 +// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0 +// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]], +// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1 +// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]], +// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]], +// CHECK: call void @__kmpc_taskloop(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1, +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal {{.*}} [[OMP_TASK]]( +// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]]) +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]], +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]], +// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* % +// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]]) +// CHECK: [[D_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]], +// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]]) +// CHECK: add nsw i32 +// CHECK: store i32 % +#endif diff --git a/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp new file mode 100644 index 0000000000..d894943cc3 --- /dev/null +++ b/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* } + +struct S { + int a; + S() : a(0) {} + S(const S&) {} + S& operator=(const S&) {return *this;} + ~S() {} + friend S operator+(const S&a, const S&b) {return a;} +}; + + +int main(int argc, char **argv) { + int a; + float b; + S c[5]; + short d[argc]; +#pragma omp taskgroup task_reduction(+: a, b, argc) + { +#pragma omp taskgroup task_reduction(-:c, d) +#pragma omp parallel +#pragma omp taskloop simd in_reduction(+:a) in_reduction(-:d) + for (int i = 0; i < 5; ++i) + a += d[a]; + } + return 0; +} + +// CHECK-LABEL: @main +// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]]) +// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* % +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]], +// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* % +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]], +// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) + +// CHECK: define internal void [[OMP_PARALLEL]]( +// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]* +// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1 +// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0 +// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]], +// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1 +// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]], +// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]], +// CHECK: call void @__kmpc_taskloop(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1, +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal {{.*}} [[OMP_TASK]]( +// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]]) +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]], +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]], +// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* % +// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]]) +// CHECK: [[D_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]], +// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]]) +// CHECK: add nsw i32 +// CHECK: store i32 % +#endif diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index 7a64de2ad3..1815fdf5bd 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -2297,6 +2297,8 @@ void OMPClauseEnqueue::VisitOMPInReductionClause( for (auto *E : C->reduction_ops()) { Visitor->AddStmt(E); } + for (auto *E : C->taskgroup_descriptors()) + Visitor->AddStmt(E); } void OMPClauseEnqueue::VisitOMPLinearClause(const OMPLinearClause *C) { VisitOMPClauseList(C);