From 3d04434c638e0e091d0cd49a44cd21c264d57ae0 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 8 Oct 2015 09:10:53 +0000 Subject: [PATCH] [OPENMP 4.1] Codegen for array sections/subscripts in 'reduction' clause. OpenMP 4.1 adds support for array sections/subscripts in 'reduction' clause. Patch adds codegen for this feature. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@249672 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/AST/DataRecursiveASTVisitor.h | 3 + include/clang/AST/OpenMPClause.h | 30 +- include/clang/AST/RecursiveASTVisitor.h | 3 + include/clang/Basic/DiagnosticSemaKinds.td | 2 + lib/AST/Expr.cpp | 4 + lib/AST/OpenMPClause.cpp | 17 +- lib/AST/StmtProfile.cpp | 3 + lib/CodeGen/CGOpenMPRuntime.cpp | 383 ++++++++++++++------ lib/CodeGen/CGOpenMPRuntime.h | 2 + lib/CodeGen/CGStmtOpenMP.cpp | 229 ++++++++++-- lib/Sema/SemaOpenMP.cpp | 143 +++++--- lib/Serialization/ASTReaderStmt.cpp | 4 + lib/Serialization/ASTWriterStmt.cpp | 2 + test/OpenMP/for_reduction_codegen.cpp | 214 ++++++++++- test/OpenMP/for_reduction_messages.cpp | 18 + tools/libclang/CIndex.cpp | 3 + 16 files changed, 870 insertions(+), 190 deletions(-) diff --git a/include/clang/AST/DataRecursiveASTVisitor.h b/include/clang/AST/DataRecursiveASTVisitor.h index 12ba21645a..46dcb6ca7b 100644 --- a/include/clang/AST/DataRecursiveASTVisitor.h +++ b/include/clang/AST/DataRecursiveASTVisitor.h @@ -2664,6 +2664,9 @@ RecursiveASTVisitor::VisitOMPReductionClause(OMPReductionClause *C) { TRY_TO(TraverseNestedNameSpecifierLoc(C->getQualifierLoc())); TRY_TO(TraverseDeclarationNameInfo(C->getNameInfo())); TRY_TO(VisitOMPClauseList(C)); + for (auto *E : C->privates()) { + TRY_TO(TraverseStmt(E)); + } for (auto *E : C->lhs_exprs()) { TRY_TO(TraverseStmt(E)); } diff --git a/include/clang/AST/OpenMPClause.h b/include/clang/AST/OpenMPClause.h index 4aa2a3896b..648feb1428 100644 --- a/include/clang/AST/OpenMPClause.h +++ b/include/clang/AST/OpenMPClause.h @@ -1588,6 +1588,19 @@ class OMPReductionClause : public OMPVarListClause { /// \brief Sets the nested name specifier. void setQualifierLoc(NestedNameSpecifierLoc NSL) { QualifierLoc = NSL; } + /// \brief Set list of helper expressions, required for proper codegen of the + /// clause. These expressions represent private copy of the reduction + /// variable. + void setPrivates(ArrayRef Privates); + + /// \brief Get the list of helper privates. + MutableArrayRef getPrivates() { + return MutableArrayRef(varlist_end(), varlist_size()); + } + ArrayRef getPrivates() const { + return llvm::makeArrayRef(varlist_end(), varlist_size()); + } + /// \brief Set list of helper expressions, required for proper codegen of the /// clause. These expressions represent LHS expression in the final /// reduction expression performed by the reduction clause. @@ -1595,10 +1608,10 @@ class OMPReductionClause : public OMPVarListClause { /// \brief Get the list of helper LHS expressions. MutableArrayRef getLHSExprs() { - return MutableArrayRef(varlist_end(), varlist_size()); + return MutableArrayRef(getPrivates().end(), varlist_size()); } ArrayRef getLHSExprs() const { - return llvm::makeArrayRef(varlist_end(), varlist_size()); + return llvm::makeArrayRef(getPrivates().end(), varlist_size()); } /// \brief Set list of helper expressions, required for proper codegen of the @@ -1640,6 +1653,8 @@ public: /// \param VL The variables in the clause. /// \param QualifierLoc The nested-name qualifier with location information /// \param NameInfo The full name info for reduction identifier. + /// \param Privates List of helper expressions for proper generation of + /// private copies. /// \param LHSExprs List of helper expressions for proper generation of /// assignment operation required for copyprivate clause. This list represents /// LHSs of the reduction expressions. @@ -1662,8 +1677,9 @@ public: Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef VL, NestedNameSpecifierLoc QualifierLoc, - const DeclarationNameInfo &NameInfo, ArrayRef LHSExprs, - ArrayRef RHSExprs, ArrayRef ReductionOps); + const DeclarationNameInfo &NameInfo, ArrayRef Privates, + ArrayRef LHSExprs, ArrayRef RHSExprs, + ArrayRef ReductionOps); /// \brief Creates an empty clause with the place for \a N variables. /// /// \param C AST context. @@ -1684,6 +1700,12 @@ public: typedef llvm::iterator_range helper_expr_const_range; + helper_expr_const_range privates() const { + return helper_expr_const_range(getPrivates().begin(), getPrivates().end()); + } + helper_expr_range privates() { + return helper_expr_range(getPrivates().begin(), getPrivates().end()); + } helper_expr_const_range lhs_exprs() const { return helper_expr_const_range(getLHSExprs().begin(), getLHSExprs().end()); } diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h index 3066b0d1bb..2fabfa988e 100644 --- a/include/clang/AST/RecursiveASTVisitor.h +++ b/include/clang/AST/RecursiveASTVisitor.h @@ -2704,6 +2704,9 @@ RecursiveASTVisitor::VisitOMPReductionClause(OMPReductionClause *C) { TRY_TO(TraverseNestedNameSpecifierLoc(C->getQualifierLoc())); TRY_TO(TraverseDeclarationNameInfo(C->getNameInfo())); TRY_TO(VisitOMPClauseList(C)); + for (auto *E : C->privates()) { + TRY_TO(TraverseStmt(E)); + } for (auto *E : C->lhs_exprs()) { TRY_TO(TraverseStmt(E)); } diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 584d734bf3..3ea950b207 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -7754,6 +7754,8 @@ def note_omp_ordered_param : Note< "'ordered' clause with specified parameter">; def err_omp_expected_array_sect_reduction_lb_not_zero : Error< "lower bound expected to be evaluated to zero">; +def err_omp_expected_base_var_name : Error< + "expected variable name as a base of the array %select{subscript|section}0">; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp index ccaeeab615..1dd54d293d 100644 --- a/lib/AST/Expr.cpp +++ b/lib/AST/Expr.cpp @@ -3984,6 +3984,10 @@ QualType OMPArraySectionExpr::getBaseOriginalType(Expr *Base) { Base = OASE->getBase(); ++ArraySectionCount; } + while (auto *ASE = dyn_cast(Base->IgnoreParens())) { + Base = ASE->getBase(); + ++ArraySectionCount; + } auto OriginalTy = Base->getType(); if (auto *DRE = dyn_cast(Base)) if (auto *PVD = dyn_cast(DRE->getDecl())) diff --git a/lib/AST/OpenMPClause.cpp b/lib/AST/OpenMPClause.cpp index 4446b55f4a..8146017268 100644 --- a/lib/AST/OpenMPClause.cpp +++ b/lib/AST/OpenMPClause.cpp @@ -340,11 +340,17 @@ OMPCopyprivateClause *OMPCopyprivateClause::CreateEmpty(const ASTContext &C, return new (Mem) OMPCopyprivateClause(N); } +void OMPReductionClause::setPrivates(ArrayRef Privates) { + assert(Privates.size() == varlist_size() && + "Number of private copies is not the same as the preallocated buffer"); + std::copy(Privates.begin(), Privates.end(), varlist_end()); +} + void OMPReductionClause::setLHSExprs(ArrayRef LHSExprs) { assert( LHSExprs.size() == varlist_size() && "Number of LHS expressions is not the same as the preallocated buffer"); - std::copy(LHSExprs.begin(), LHSExprs.end(), varlist_end()); + std::copy(LHSExprs.begin(), LHSExprs.end(), getPrivates().end()); } void OMPReductionClause::setRHSExprs(ArrayRef RHSExprs) { @@ -365,14 +371,15 @@ OMPReductionClause *OMPReductionClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef VL, NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, - ArrayRef LHSExprs, ArrayRef RHSExprs, - ArrayRef ReductionOps) { + ArrayRef Privates, ArrayRef LHSExprs, + ArrayRef RHSExprs, ArrayRef ReductionOps) { void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPReductionClause), llvm::alignOf()) + - 4 * sizeof(Expr *) * VL.size()); + 5 * sizeof(Expr *) * VL.size()); OMPReductionClause *Clause = new (Mem) OMPReductionClause( StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo); Clause->setVarRefs(VL); + Clause->setPrivates(Privates); Clause->setLHSExprs(LHSExprs); Clause->setRHSExprs(RHSExprs); Clause->setReductionOps(ReductionOps); @@ -383,7 +390,7 @@ OMPReductionClause *OMPReductionClause::CreateEmpty(const ASTContext &C, unsigned N) { void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPReductionClause), llvm::alignOf()) + - 4 * sizeof(Expr *) * N); + 5 * sizeof(Expr *) * N); return new (Mem) OMPReductionClause(N); } diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp index c807f5e2f9..cf5289adc2 100644 --- a/lib/AST/StmtProfile.cpp +++ b/lib/AST/StmtProfile.cpp @@ -382,6 +382,9 @@ void OMPClauseProfiler::VisitOMPReductionClause( C->getQualifierLoc().getNestedNameSpecifier()); Profiler->VisitName(C->getNameInfo().getName()); VisitOMPClauseList(C); + for (auto *E : C->privates()) { + Profiler->VisitStmt(E); + } for (auto *E : C->lhs_exprs()) { Profiler->VisitStmt(E); } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index aa7721bb16..c423557ace 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -922,6 +922,26 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { return RTLFn; } +static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { + auto &C = CGF.getContext(); + llvm::Value *Size = nullptr; + auto SizeInChars = C.getTypeSizeInChars(Ty); + if (SizeInChars.isZero()) { + // getTypeSizeInChars() returns 0 for a VLA. + while (auto *VAT = C.getAsVariableArrayType(Ty)) { + llvm::Value *ArraySize; + std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); + Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; + } + SizeInChars = C.getTypeSizeInChars(Ty); + assert(!SizeInChars.isZero()); + Size = CGF.Builder.CreateNUWMul( + Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); + } else + Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); + return Size; +} + llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && @@ -1438,17 +1458,16 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, /// Given an array of pointers to variables, project the address of a /// given variable. -static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, - Address Array, unsigned Index, - const VarDecl *Var) { +static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, + unsigned Index, const VarDecl *Var) { // Pull out the pointer to the variable. Address PtrAddr = - CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); + CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); - Addr = CGF.Builder.CreateElementBitCast(Addr, - CGF.ConvertTypeForMem(Var->getType())); + Addr = CGF.Builder.CreateElementBitCast( + Addr, CGF.ConvertTypeForMem(Var->getType())); return Addr; } @@ -1569,8 +1588,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, auto *CpyFn = emitCopyprivateCopyFunction( CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); - auto *BufSize = llvm::ConstantInt::get( - CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); + auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy); Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, CGF.VoidPtrTy); @@ -2199,27 +2217,6 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, return TaskPrivatesMap; } -static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { - auto &C = CGF.getContext(); - llvm::Value *Size; - auto SizeInChars = C.getTypeSizeInChars(Ty); - if (SizeInChars.isZero()) { - // getTypeSizeInChars() returns 0 for a VLA. - Size = nullptr; - while (auto *VAT = C.getAsVariableArrayType(Ty)) { - llvm::Value *ArraySize; - std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); - Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; - } - SizeInChars = C.getTypeSizeInChars(Ty); - assert(!SizeInChars.isZero()); - Size = CGF.Builder.CreateNUWMul( - Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); - } else - Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); - return Size; -} - static int array_pod_sort_comparator(const PrivateDataTy *P1, const PrivateDataTy *P2) { return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); @@ -2277,8 +2274,7 @@ void CGOpenMPRuntime::emitTaskCall( C.getPointerType(KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); - auto KmpTaskTWithPrivatesTySize = - CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy)); + auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy); QualType SharedsPtrTy = C.getPointerType(SharedsTy); // Emit initial values for private copies (if any). @@ -2319,12 +2315,12 @@ void CGOpenMPRuntime::emitTaskCall( CGF.Builder.getInt32(/*C=*/0)) : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); - auto SharedsSize = C.getTypeSizeInChars(SharedsTy); - llvm::Value *AllocArgs[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags, - KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize), - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry, - KmpRoutineEntryPtrTy)}; + auto *SharedsSize = getTypeSize(CGF, SharedsTy); + llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), TaskFlags, + KmpTaskTWithPrivatesTySize, SharedsSize, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TaskEntry, KmpRoutineEntryPtrTy)}; auto *NewTask = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( @@ -2442,8 +2438,8 @@ void CGOpenMPRuntime::emitTaskCall( enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 }; enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; RecordDecl *KmpDependInfoRD; - QualType FlagsTy = C.getIntTypeForBitwidth( - C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false); + QualType FlagsTy = + C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); if (KmpDependInfoTy.isNull()) { KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); @@ -2477,9 +2473,8 @@ void CGOpenMPRuntime::emitTaskCall( CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); - } else { + } else Size = getTypeSize(CGF, Ty); - } auto Base = CGF.MakeAddrLValue( CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), KmpDependInfoTy); @@ -2596,8 +2591,89 @@ void CGOpenMPRuntime::emitTaskCall( } } +/// \brief Emit reduction operation for each element of array (required for +/// array sections) LHS op = RHS. +/// \param Type Type of array. +/// \param LHSVar Variable on the left side of the reduction operation +/// (references element of array in original variable). +/// \param RHSVar Variable on the right side of the reduction operation +/// (references element of array in original variable). +/// \param RedOpGen Generator of reduction operation with use of LHSVar and +/// RHSVar. +void EmitOMPAggregateReduction( + CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, + const VarDecl *RHSVar, + const llvm::function_ref &RedOpGen, + const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, + const Expr *UpExpr = nullptr) { + // Perform element-by-element initialization. + QualType ElementTy; + Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); + Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); + + // Drill down to the base element type on both arrays. + auto ArrayTy = Type->getAsArrayTypeUnsafe(); + auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); + + auto RHSBegin = RHSAddr.getPointer(); + auto LHSBegin = LHSAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); + auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); + auto IsEmpty = + CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + + CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( + RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); + RHSElementPHI->addIncoming(RHSBegin, EntryBB); + Address RHSElementCurrent = + Address(RHSElementPHI, + RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( + LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); + LHSElementPHI->addIncoming(LHSBegin, EntryBB); + Address LHSElementCurrent = + Address(LHSElementPHI, + LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + CodeGenFunction::OMPPrivateScope Scope(CGF); + Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); + Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); + Scope.Privatize(); + RedOpGen(CGF, XExpr, EExpr, UpExpr); + Scope.ForceCleanup(); + + // Shift the address forward by one element. + auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( + LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( + RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); + // Check whether we've reached the end. + auto Done = + CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); + CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); + LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); + RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); + + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + static llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, + ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps) { @@ -2634,19 +2710,49 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, // *(Type*)lhs[i] = RedOp(*(Type*)lhs[i], *(Type*)rhs[i]); // ... CodeGenFunction::OMPPrivateScope Scope(CGF); - for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) { + auto IPriv = Privates.begin(); + unsigned Idx = 0; + for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { auto RHSVar = cast(cast(RHSExprs[I])->getDecl()); Scope.addPrivate(RHSVar, [&]() -> Address { - return emitAddrOfVarFromArray(CGF, RHS, I, RHSVar); + return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); }); auto LHSVar = cast(cast(LHSExprs[I])->getDecl()); Scope.addPrivate(LHSVar, [&]() -> Address { - return emitAddrOfVarFromArray(CGF, LHS, I, LHSVar); + return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); }); + QualType PrivTy = (*IPriv)->getType(); + if (PrivTy->isArrayType()) { + // Get array size and emit VLA type. + ++Idx; + Address Elem = + CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); + llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast( + CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()), + RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); + CGF.EmitVariablyModifiedType(PrivTy); + } } Scope.Privatize(); + IPriv = Privates.begin(); + auto ILHS = LHSExprs.begin(); + auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - CGF.EmitIgnoredExpr(E); + if ((*IPriv)->getType()->isArrayType()) { + // Emit reduction for array section. + auto *LHSVar = cast(cast(*ILHS)->getDecl()); + auto *RHSVar = cast(cast(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, + [=](CodeGenFunction &CGF, const Expr *, + const Expr *, + const Expr *) { CGF.EmitIgnoredExpr(E); }); + } else + // Emit reduction for array subscript or single variable. + CGF.EmitIgnoredExpr(E); + ++IPriv, ++ILHS, ++IRHS; } Scope.ForceCleanup(); CGF.FinishFunction(); @@ -2654,6 +2760,7 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, } void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, + ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps, @@ -2697,33 +2804,68 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, if (SimpleReduction) { CodeGenFunction::RunCleanupsScope Scope(CGF); + auto IPriv = Privates.begin(); + auto ILHS = LHSExprs.begin(); + auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - CGF.EmitIgnoredExpr(E); + if ((*IPriv)->getType()->isArrayType()) { + auto *LHSVar = cast(cast(*ILHS)->getDecl()); + auto *RHSVar = cast(cast(*IRHS)->getDecl()); + EmitOMPAggregateReduction( + CGF, (*IPriv)->getType(), LHSVar, RHSVar, + [=](CodeGenFunction &CGF, const Expr *, const Expr *, + const Expr *) { CGF.EmitIgnoredExpr(E); }); + } else + CGF.EmitIgnoredExpr(E); + ++IPriv, ++ILHS, ++IRHS; } return; } // 1. Build a list of reduction variables. // void *RedList[] = {[0], ..., [-1]}; - llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size()); + auto Size = RHSExprs.size(); + for (auto *E : Privates) { + if (E->getType()->isArrayType()) + // Reserve place for array size. + ++Size; + } + llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); - for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) { + auto IPriv = Privates.begin(); + unsigned Idx = 0; + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { Address Elem = - CGF.Builder.CreateConstArrayGEP(ReductionList, I, CGF.getPointerSize()); + CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), Elem); + if ((*IPriv)->getType()->isArrayType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + CGF.Builder.CreateStore( + CGF.Builder.CreateIntToPtr( + CGF.Builder.CreateIntCast( + CGF.getVLASize(CGF.getContext().getAsVariableArrayType( + (*IPriv)->getType())) + .first, + CGF.SizeTy, /*isSigned=*/false), + CGF.VoidPtrTy), + Elem); + } } // 2. Emit reduce_func(). auto *ReductionFn = emitReductionFunction( - CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs, - RHSExprs, ReductionOps); + CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, + LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; auto *Lock = getCriticalRegionLock(".reduction"); @@ -2734,8 +2876,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF, Loc, static_cast(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); auto *ThreadId = getThreadID(CGF, Loc); - auto *ReductionArrayTySize = llvm::ConstantInt::get( - CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity()); + auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy); auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), CGF.VoidPtrTy); @@ -2781,8 +2922,22 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait : OMPRTL__kmpc_end_reduce), llvm::makeArrayRef(EndArgs)); + auto IPriv = Privates.begin(); + auto ILHS = LHSExprs.begin(); + auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - CGF.EmitIgnoredExpr(E); + if ((*IPriv)->getType()->isArrayType()) { + // Emit reduction for array section. + auto *LHSVar = cast(cast(*ILHS)->getDecl()); + auto *RHSVar = cast(cast(*IRHS)->getDecl()); + EmitOMPAggregateReduction( + CGF, (*IPriv)->getType(), LHSVar, RHSVar, + [=](CodeGenFunction &CGF, const Expr *, const Expr *, + const Expr *) { CGF.EmitIgnoredExpr(E); }); + } else + // Emit reduction for array subscript or single variable. + CGF.EmitIgnoredExpr(E); + ++IPriv, ++ILHS, ++IRHS; } } @@ -2812,62 +2967,84 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, createRuntimeFunction(OMPRTL__kmpc_end_reduce), llvm::makeArrayRef(EndArgs)); } - auto I = LHSExprs.begin(); + auto ILHS = LHSExprs.begin(); + auto IRHS = RHSExprs.begin(); + auto IPriv = Privates.begin(); for (auto *E : ReductionOps) { - const Expr *XExpr = nullptr; - const Expr *EExpr = nullptr; - const Expr *UpExpr = nullptr; - BinaryOperatorKind BO = BO_Comma; - if (auto *BO = dyn_cast(E)) { - if (BO->getOpcode() == BO_Assign) { - XExpr = BO->getLHS(); - UpExpr = BO->getRHS(); - } - } - // Try to emit update expression as a simple atomic. - auto *RHSExpr = UpExpr; - if (RHSExpr) { - // Analyze RHS part of the whole expression. - if (auto *ACO = dyn_cast( - RHSExpr->IgnoreParenImpCasts())) { - // If this is a conditional operator, analyze its condition for - // min/max reduction operator. - RHSExpr = ACO->getCond(); + const Expr *XExpr = nullptr; + const Expr *EExpr = nullptr; + const Expr *UpExpr = nullptr; + BinaryOperatorKind BO = BO_Comma; + if (auto *BO = dyn_cast(E)) { + if (BO->getOpcode() == BO_Assign) { + XExpr = BO->getLHS(); + UpExpr = BO->getRHS(); + } } - if (auto *BORHS = - dyn_cast(RHSExpr->IgnoreParenImpCasts())) { - EExpr = BORHS->getRHS(); - BO = BORHS->getOpcode(); + // Try to emit update expression as a simple atomic. + auto *RHSExpr = UpExpr; + if (RHSExpr) { + // Analyze RHS part of the whole expression. + if (auto *ACO = dyn_cast( + RHSExpr->IgnoreParenImpCasts())) { + // If this is a conditional operator, analyze its condition for + // min/max reduction operator. + RHSExpr = ACO->getCond(); + } + if (auto *BORHS = + dyn_cast(RHSExpr->IgnoreParenImpCasts())) { + EExpr = BORHS->getRHS(); + BO = BORHS->getOpcode(); + } } - } - if (XExpr) { - auto *VD = cast(cast(*I)->getDecl()); - LValue X = CGF.EmitLValue(XExpr); - RValue E; - if (EExpr) - E = CGF.EmitAnyExpr(EExpr); - CGF.EmitOMPAtomicSimpleUpdateExpr( - X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, - [&CGF, UpExpr, VD](RValue XRValue) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate( - VD, [&CGF, VD, XRValue]() -> Address { + if (XExpr) { + auto *VD = cast(cast(*ILHS)->getDecl()); + auto &&AtomicRedGen = [this, BO, VD, IPriv, + Loc](CodeGenFunction &CGF, const Expr *XExpr, + const Expr *EExpr, const Expr *UpExpr) { + LValue X = CGF.EmitLValue(XExpr); + RValue E; + if (EExpr) + E = CGF.EmitAnyExpr(EExpr); + CGF.EmitOMPAtomicSimpleUpdateExpr( + X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, + [&CGF, UpExpr, VD, IPriv](RValue XRValue) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address { Address LHSTemp = CGF.CreateMemTemp(VD->getType()); CGF.EmitStoreThroughLValue( - XRValue, - CGF.MakeAddrLValue(LHSTemp, VD->getType())); + XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType())); return LHSTemp; }); - (void)PrivateScope.Privatize(); - return CGF.EmitAnyExpr(UpExpr); - }); - } else { - // Emit as a critical region. - emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) { - CGF.EmitIgnoredExpr(E); - }, Loc); - } - ++I; + (void)PrivateScope.Privatize(); + return CGF.EmitAnyExpr(UpExpr); + }); + }; + if ((*IPriv)->getType()->isArrayType()) { + // Emit atomic reduction for array section. + auto *RHSVar = cast(cast(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, + AtomicRedGen, XExpr, EExpr, UpExpr); + } else + // Emit atomic reduction for array subscript or single variable. + AtomicRedGen(CGF, XExpr, EExpr, UpExpr); + } else { + // Emit as a critical region. + auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, + const Expr *, const Expr *) { + emitCriticalRegion( + CGF, ".atomic_reduction", + [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc); + }; + if ((*IPriv)->getType()->isArrayType()) { + auto *LHSVar = cast(cast(*ILHS)->getDecl()); + auto *RHSVar = cast(cast(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, + CritRedGen); + } else + CritRedGen(CGF, nullptr, nullptr, nullptr); + } + ++ILHS, ++IRHS, ++IPriv; } } diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index ec128de4e9..dc4238cda6 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -702,6 +702,7 @@ public: /// } /// \endcode /// + /// \param Privates List of private copies for original reduction arguments. /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' @@ -709,6 +710,7 @@ public: /// \param WithNowait true if parent directive has also nowait clause, false /// otherwise. virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, + ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps, diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index cf54d0ee45..8f7dfaa7be 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -212,6 +212,65 @@ void CodeGenFunction::EmitOMPAggregateAssign( EmitBlock(DoneBB, /*IsFinished=*/true); } +/// \brief Emit initialization of arrays of complex types. +/// \param Type Type of array. +/// \param DestAddr Address of the array. +/// \param Type Type of array. +/// \param Init Initial expression of array. +static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, + QualType Type, const Expr *Init) { + // Perform element-by-element initialization. + QualType ElementTy; + + // Drill down to the base element type on both arrays. + auto ArrayTy = Type->getAsArrayTypeUnsafe(); + auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + DestAddr = + CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); + + auto DestBegin = DestAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + auto IsEmpty = + CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + + CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + { + CodeGenFunction::RunCleanupsScope InitScope(CGF); + CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), + /*IsInitializer=*/false); + } + + // Shift the address forward by one element. + auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + // Check whether we've reached the end. + auto Done = + CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); + + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { @@ -546,41 +605,167 @@ void CodeGenFunction::EmitOMPReductionClauseInit( for (const auto *C : D.getClausesOfKind()) { auto ILHS = C->lhs_exprs().begin(); auto IRHS = C->rhs_exprs().begin(); + auto IPriv = C->privates().begin(); for (auto IRef : C->varlists()) { - auto *OrigVD = cast(cast(IRef)->getDecl()); auto *LHSVD = cast(cast(*ILHS)->getDecl()); - auto *PrivateVD = cast(cast(*IRHS)->getDecl()); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { - DeclRefExpr DRE(const_cast(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - return EmitLValue(&DRE).getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { - // Emit private VarDecl with reduction init. - EmitDecl(*PrivateVD); - return GetAddrOfLocalVar(PrivateVD); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - ++ILHS, ++IRHS; + auto *RHSVD = cast(cast(*IRHS)->getDecl()); + auto *PrivateVD = cast(cast(*IPriv)->getDecl()); + if (auto *OASE = dyn_cast(IRef)) { + auto *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempOASE = dyn_cast(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + auto *DE = cast(Base); + auto *OrigVD = cast(DE->getDecl()); + auto OASELValueLB = EmitOMPArraySectionExpr(OASE); + auto OASELValueUB = + EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + auto OriginalBaseLValue = EmitLValue(DE); + auto BaseLValue = OriginalBaseLValue; + auto *Zero = Builder.getInt64(/*C=*/0); + llvm::SmallVector Indexes; + Indexes.push_back(Zero); + auto *ItemTy = + OASELValueLB.getPointer()->getType()->getPointerElementType(); + auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); + while (Ty != ItemTy) { + Indexes.push_back(Zero); + Ty = Ty->getPointerElementType(); + } + BaseLValue = MakeAddrLValue( + Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), + OASELValueLB.getAlignment()), + OASELValueLB.getType(), OASELValueLB.getAlignmentSource()); + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { + return OASELValueLB.getAddress(); + }); + // Emit reduction copy. + bool IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB, + OriginalBaseLValue]() -> Address { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), + OASELValueLB.getPointer()); + Size = Builder.CreateNUWAdd( + Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); + CodeGenFunction::OpaqueValueMapping OpaqueMap( + *this, cast( + getContext() + .getAsVariableArrayType(PrivateVD->getType()) + ->getSizeExpr()), + RValue::get(Size)); + EmitVariablyModifiedType(PrivateVD->getType()); + auto Emission = EmitAutoVarAlloca(*PrivateVD); + auto Addr = Emission.getAllocatedAddress(); + auto *Init = PrivateVD->getInit(); + EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); + EmitAutoVarCleanups(Emission); + // Emit private VarDecl with reduction init. + auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), + OASELValueLB.getPointer()); + auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); + Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( + Ptr, OriginalBaseLValue.getPointer()->getType()); + return Address(Ptr, OriginalBaseLValue.getAlignment()); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else if (auto *ASE = dyn_cast(IRef)) { + auto *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + auto *DE = cast(Base); + auto *OrigVD = cast(DE->getDecl()); + auto ASELValue = EmitLValue(ASE); + auto OriginalBaseLValue = EmitLValue(DE); + auto BaseLValue = OriginalBaseLValue; + auto *Zero = Builder.getInt64(/*C=*/0); + llvm::SmallVector Indexes; + Indexes.push_back(Zero); + auto *ItemTy = + ASELValue.getPointer()->getType()->getPointerElementType(); + auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); + while (Ty != ItemTy) { + Indexes.push_back(Zero); + Ty = Ty->getPointerElementType(); + } + BaseLValue = MakeAddrLValue( + Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), + ASELValue.getAlignment()), + ASELValue.getType(), ASELValue.getAlignmentSource()); + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { + return ASELValue.getAddress(); + }); + // Emit reduction copy. + bool IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, PrivateVD, BaseLValue, ASELValue, + OriginalBaseLValue]() -> Address { + // Emit private VarDecl with reduction init. + EmitDecl(*PrivateVD); + auto Addr = GetAddrOfLocalVar(PrivateVD); + auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), + ASELValue.getPointer()); + auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); + Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( + Ptr, OriginalBaseLValue.getPointer()->getType()); + return Address(Ptr, OriginalBaseLValue.getAlignment()); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else { + auto *OrigVD = cast(cast(IRef)->getDecl()); + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { + DeclRefExpr DRE(const_cast(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + IRef->getType(), VK_LValue, IRef->getExprLoc()); + return EmitLValue(&DRE).getAddress(); + }); + // Emit reduction copy. + bool IsRegistered = + PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { + // Emit private VarDecl with reduction init. + EmitDecl(*PrivateVD); + return GetAddrOfLocalVar(PrivateVD); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } + ++ILHS, ++IRHS, ++IPriv; } } } void CodeGenFunction::EmitOMPReductionClauseFinal( const OMPExecutableDirective &D) { + llvm::SmallVector Privates; llvm::SmallVector LHSExprs; llvm::SmallVector RHSExprs; llvm::SmallVector ReductionOps; bool HasAtLeastOneReduction = false; for (const auto *C : D.getClausesOfKind()) { HasAtLeastOneReduction = true; + Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); @@ -589,7 +774,7 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( - *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, + *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, D.getSingleClause() || isOpenMPParallelDirective(D.getDirectiveKind()) || D.getDirectiveKind() == OMPD_simd, diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 3fe9100d88..c4e1b26e7c 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -6047,6 +6047,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, VDPrivate->setInvalidDecl(); else VDPrivate->setInit(Result.getAs()); + // Remove temp variable declaration. + Context.Deallocate(VDInitTemp); } else { auto *VDInit = buildVarDecl(*this, DE->getLocStart(), Type, ".firstprivate.temp"); @@ -6407,6 +6409,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( } SmallVector Vars; + SmallVector Privates; SmallVector LHSs; SmallVector RHSs; SmallVector ReductionOps; @@ -6415,6 +6418,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( if (isa(RefExpr)) { // It will be analyzed later. Vars.push_back(RefExpr); + Privates.push_back(nullptr); LHSs.push_back(nullptr); RHSs.push_back(nullptr); ReductionOps.push_back(nullptr); @@ -6426,6 +6430,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( RefExpr->containsUnexpandedParameterPack()) { // It will be analyzed later. Vars.push_back(RefExpr); + Privates.push_back(nullptr); LHSs.push_back(nullptr); RHSs.push_back(nullptr); ReductionOps.push_back(nullptr); @@ -6454,24 +6459,38 @@ OMPClause *Sema::ActOnOpenMPReductionClause( auto D = DE->getDecl(); VD = cast(D); Type = VD->getType(); - } else if (ASE) + } else if (ASE) { Type = ASE->getType(); - else if (OASE) { + auto *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = dyn_cast(Base); + if (DE) + VD = dyn_cast(DE->getDecl()); + if (!VD) { + Diag(Base->getExprLoc(), diag::err_omp_expected_base_var_name) + << 0 << Base->getSourceRange(); + continue; + } + } else if (OASE) { auto BaseType = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); if (auto *ATy = BaseType->getAsArrayTypeUnsafe()) Type = ATy->getElementType(); else Type = BaseType->getPointeeType(); - } - // OpenMP [2.15.3.6, reduction Clause] - // If a list item is an array section, its lower-bound must be zero. - llvm::APSInt Result; - if (OASE && OASE->getLowerBound() && - OASE->getLowerBound()->EvaluateAsInt(Result, Context) && Result != 0) { - Diag(OASE->getLowerBound()->getExprLoc(), - diag::err_omp_expected_array_sect_reduction_lb_not_zero) - << OASE->getLowerBound()->getSourceRange(); - continue; + auto *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempOASE = dyn_cast(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = dyn_cast(Base); + if (DE) + VD = dyn_cast(DE->getDecl()); + if (!VD) { + Diag(Base->getExprLoc(), diag::err_omp_expected_base_var_name) + << 1 << Base->getSourceRange(); + continue; + } } // OpenMP [2.9.3.3, Restrictions, C/C++, p.3] @@ -6484,7 +6503,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // Arrays may not appear in a reduction clause. if (Type.getNonReferenceType()->isArrayType()) { Diag(ELoc, diag::err_omp_reduction_type_array) << Type << ERange; - if (VD) { + if (!ASE && !OASE) { bool IsDecl = VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly; Diag(VD->getLocation(), @@ -6499,7 +6518,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( if (Type.getNonReferenceType().isConstant(Context)) { Diag(ELoc, diag::err_omp_const_reduction_list_item) << getOpenMPClauseName(OMPC_reduction) << Type << ERange; - if (VD) { + if (!ASE && !OASE) { bool IsDecl = VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly; Diag(VD->getLocation(), @@ -6511,7 +6530,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // OpenMP [2.9.3.6, Restrictions, C/C++, p.4] // If a list-item is a reference type then it must bind to the same object // for all threads of the team. - if (VD) { + if (!ASE && !OASE) { VarDecl *VDDef = VD->getDefinition(); if (Type->isReferenceType() && VDDef) { DSARefChecker Check(DSAStack); @@ -6535,7 +6554,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( (getLangOpts().CPlusPlus && Type->isArithmeticType()))) { Diag(ELoc, diag::err_omp_clause_not_arithmetic_type_arg) << getLangOpts().CPlusPlus; - if (VD) { + if (!ASE && !OASE) { bool IsDecl = VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly; Diag(VD->getLocation(), @@ -6547,7 +6566,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( if ((BOK == BO_OrAssign || BOK == BO_AndAssign || BOK == BO_XorAssign) && !getLangOpts().CPlusPlus && Type->isFloatingType()) { Diag(ELoc, diag::err_omp_clause_floating_type_arg); - if (VD) { + if (!ASE && !OASE) { bool IsDecl = VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly; Diag(VD->getLocation(), @@ -6568,48 +6587,58 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // but a list item can appear only once in the reduction clauses for that // directive. DSAStackTy::DSAVarData DVar; - if (VD) { - DVar = DSAStack->getTopDSA(VD, false); - if (DVar.CKind == OMPC_reduction) { - Diag(ELoc, diag::err_omp_once_referenced) - << getOpenMPClauseName(OMPC_reduction); - if (DVar.RefExpr) { - Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_referenced); - } - } else if (DVar.CKind != OMPC_unknown) { - Diag(ELoc, diag::err_omp_wrong_dsa) - << getOpenMPClauseName(DVar.CKind) - << getOpenMPClauseName(OMPC_reduction); - ReportOriginalDSA(*this, DSAStack, VD, DVar); - continue; + DVar = DSAStack->getTopDSA(VD, false); + if (DVar.CKind == OMPC_reduction) { + Diag(ELoc, diag::err_omp_once_referenced) + << getOpenMPClauseName(OMPC_reduction); + if (DVar.RefExpr) { + Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_referenced); } + } else if (DVar.CKind != OMPC_unknown) { + Diag(ELoc, diag::err_omp_wrong_dsa) + << getOpenMPClauseName(DVar.CKind) + << getOpenMPClauseName(OMPC_reduction); + ReportOriginalDSA(*this, DSAStack, VD, DVar); + continue; } // OpenMP [2.14.3.6, Restrictions, p.1] // A list item that appears in a reduction clause of a worksharing // construct must be shared in the parallel regions to which any of the // worksharing regions arising from the worksharing construct bind. - if (VD) { - OpenMPDirectiveKind CurrDir = DSAStack->getCurrentDirective(); - if (isOpenMPWorksharingDirective(CurrDir) && - !isOpenMPParallelDirective(CurrDir)) { - DVar = DSAStack->getImplicitDSA(VD, true); - if (DVar.CKind != OMPC_shared) { - Diag(ELoc, diag::err_omp_required_access) - << getOpenMPClauseName(OMPC_reduction) - << getOpenMPClauseName(OMPC_shared); - ReportOriginalDSA(*this, DSAStack, VD, DVar); - continue; - } + OpenMPDirectiveKind CurrDir = DSAStack->getCurrentDirective(); + if (isOpenMPWorksharingDirective(CurrDir) && + !isOpenMPParallelDirective(CurrDir)) { + DVar = DSAStack->getImplicitDSA(VD, true); + if (DVar.CKind != OMPC_shared) { + Diag(ELoc, diag::err_omp_required_access) + << getOpenMPClauseName(OMPC_reduction) + << getOpenMPClauseName(OMPC_shared); + ReportOriginalDSA(*this, DSAStack, VD, DVar); + continue; } } + Type = Type.getNonLValueExprType(Context).getUnqualifiedType(); - auto *LHSVD = - buildVarDecl(*this, ELoc, Type, ".reduction.lhs", - VD && VD->hasAttrs() ? &VD->getAttrs() : nullptr); - auto *RHSVD = - buildVarDecl(*this, ELoc, Type, VD ? VD->getName() : ".item.", - VD && VD->hasAttrs() ? &VD->getAttrs() : nullptr); + auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs", + VD->hasAttrs() ? &VD->getAttrs() : nullptr); + auto *RHSVD = buildVarDecl(*this, ELoc, Type, VD->getName(), + VD->hasAttrs() ? &VD->getAttrs() : nullptr); + auto PrivateTy = Type; + if (OASE) { + // For array sections only: + // Create pseudo array type for private copy. The size for this array will + // be generated during codegen. + // For array subscripts or single variables Private Ty is the same as Type + // (type of the variable or single array element). + PrivateTy = Context.getVariableArrayType( + Type, new (Context) OpaqueValueExpr(SourceLocation(), + Context.getSizeType(), VK_RValue), + ArrayType::Normal, /*IndexTypeQuals=*/0, SourceRange()); + } + // Private copy. + auto *PrivateVD = buildVarDecl(*this, ELoc, PrivateTy, VD->getName(), + VD->hasAttrs() ? &VD->getAttrs() : nullptr); // Add initializer for private variable. Expr *Init = nullptr; switch (BOK) { @@ -6718,9 +6747,8 @@ OMPClause *Sema::ActOnOpenMPReductionClause( if (Init) { AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false, /*TypeMayContainAuto=*/false); - } else { + } else ActOnUninitializedDecl(RHSVD, /*TypeMayContainAuto=*/false); - } if (!RHSVD->hasInit()) { Diag(ELoc, diag::err_omp_reduction_id_not_compatible) << Type << ReductionIdRange; @@ -6733,8 +6761,13 @@ OMPClause *Sema::ActOnOpenMPReductionClause( } continue; } + // Store initializer for single element in private copy. Will be used during + // codegen. + PrivateVD->setInit(RHSVD->getInit()); + PrivateVD->setInitStyle(RHSVD->getInitStyle()); auto *LHSDRE = buildDeclRefExpr(*this, LHSVD, Type, ELoc); auto *RHSDRE = buildDeclRefExpr(*this, RHSVD, Type, ELoc); + auto *PrivateDRE = buildDeclRefExpr(*this, PrivateVD, PrivateTy, ELoc); ExprResult ReductionOp = BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(), BOK, LHSDRE, RHSDRE); @@ -6756,9 +6789,9 @@ OMPClause *Sema::ActOnOpenMPReductionClause( if (ReductionOp.isInvalid()) continue; - if (VD) - DSAStack->addDSA(VD, DE, OMPC_reduction); + DSAStack->addDSA(VD, DE, OMPC_reduction); Vars.push_back(RefExpr); + Privates.push_back(PrivateDRE); LHSs.push_back(LHSDRE); RHSs.push_back(RHSDRE); ReductionOps.push_back(ReductionOp.get()); @@ -6769,8 +6802,8 @@ OMPClause *Sema::ActOnOpenMPReductionClause( return OMPReductionClause::Create( Context, StartLoc, LParenLoc, ColonLoc, EndLoc, Vars, - ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, LHSs, - RHSs, ReductionOps); + ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, Privates, + LHSs, RHSs, ReductionOps); } OMPClause *Sema::ActOnOpenMPLinearClause( diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp index 4d55deeaa5..5f9f052292 100644 --- a/lib/Serialization/ASTReaderStmt.cpp +++ b/lib/Serialization/ASTReaderStmt.cpp @@ -1994,6 +1994,10 @@ void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) { Vars.push_back(Reader->Reader.ReadSubExpr()); C->setVarRefs(Vars); Vars.clear(); + for (unsigned i = 0; i != NumVars; ++i) + Vars.push_back(Reader->Reader.ReadSubExpr()); + C->setPrivates(Vars); + Vars.clear(); for (unsigned i = 0; i != NumVars; ++i) Vars.push_back(Reader->Reader.ReadSubExpr()); C->setLHSExprs(Vars); diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp index 33cffaf115..7fce321791 100644 --- a/lib/Serialization/ASTWriterStmt.cpp +++ b/lib/Serialization/ASTWriterStmt.cpp @@ -1869,6 +1869,8 @@ void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) { Writer->Writer.AddDeclarationNameInfo(C->getNameInfo(), Record); for (auto *VE : C->varlists()) Writer->Writer.AddStmt(VE); + for (auto *VE : C->privates()) + Writer->Writer.AddStmt(VE); for (auto *E : C->lhs_exprs()) Writer->Writer.AddStmt(E); for (auto *E : C->rhs_exprs()) diff --git a/test/OpenMP/for_reduction_codegen.cpp b/test/OpenMP/for_reduction_codegen.cpp index 6c853fe8d3..59d785480e 100644 --- a/test/OpenMP/for_reduction_codegen.cpp +++ b/test/OpenMP/for_reduction_codegen.cpp @@ -181,13 +181,17 @@ int main() { int vec[] = {1, 2}; S s_arr[] = {1, 2}; S &var = test; - S var1; + S var1, arrs[10][4]; #pragma omp parallel #pragma omp for reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) for (int i = 0; i < 2; ++i) { vec[i] = t_var; s_arr[i] = var; } + int arr[10][vec[1]]; +#pragma omp parallel for reduction(+:arr[1][:vec[1]]) reduction(&:arrs[1:vec[1]][1:2]) + for (int i = 0; i < 10; ++i) + ++arr[1][i]; return tmain(); #endif } @@ -196,6 +200,7 @@ int main() { // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [2 x i32]*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK1:@.+]] to void // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // CHECK: ret @@ -458,6 +463,213 @@ int main() { // CHECK: store float [[UP]], float* [[T_VAR1_LHS]], // CHECK: ret void +// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* nonnull %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}}) + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*], + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], + +// CHECK: [[IDX1:%.+]] = mul nsw i64 1, %{{.+}} +// CHECK: [[LB1:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]] +// CHECK: [[LB1_0:%.+]] = getelementptr inbounds i32, i32* [[LB1]], i64 0 +// CHECK: [[IDX1:%.+]] = mul nsw i64 1, %{{.+}} +// CHECK: [[UB1:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]] +// CHECK: [[UB1_UP:%.+]] = getelementptr inbounds i32, i32* [[UB1]], i64 % +// CHECK: [[UB_CAST:%.+]] = ptrtoint i32* [[UB1_UP]] to i64 +// CHECK: [[LB_CAST:%.+]] = ptrtoint i32* [[LB1_0]] to i64 +// CHECK: [[DIFF:%.+]] = sub i64 [[UB_CAST]], [[LB_CAST]] +// CHECK: [[SIZE_1:%.+]] = sdiv exact i64 [[DIFF]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK: [[ARR_SIZE:%.+]] = add nuw i64 [[SIZE_1]], 1 +// CHECK: call i8* @llvm.stacksave() +// CHECK: [[ARR_PRIV:%.+]] = alloca i32, i64 [[ARR_SIZE]], + +// Check initialization of private copy. +// CHECK: [[END:%.+]] = getelementptr i32, i32* [[ARR_PRIV]], i64 [[ARR_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[ARR_PRIV]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi i32* +// CHECK: store i32 0, i32* % +// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]] +// CHECK: br i1 [[DONE]], + +// CHECK: [[ARRS_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[ARRS_SIZE:%.+]], + +// Check initialization of private copy. +// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_PRIV]], i64 [[ARRS_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_PRIV]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi [[S_FLOAT_TY]]* +// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* % +// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] +// CHECK: br i1 [[DONE]], + +// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] +// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] +// CHECK: call void @__kmpc_for_static_init_4( +// Skip checks for internal operations. +// CHECK: call void @__kmpc_for_static_fini( + +// void *RedList[] = {[0], ..., [-1]}; + +// CHECK: [[ARR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0 +// CHECK: [[BITCAST:%.+]] = bitcast i32* [[ARR_PRIV]] to i8* +// CHECK: store i8* [[BITCAST]], i8** [[ARR_PRIV_REF]], +// CHECK: [[ARR_SIZE_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1 +// CHECK: [[BITCAST:%.+]] = inttoptr i64 [[ARR_SIZE]] to i8* +// CHECK: store i8* [[BITCAST]], i8** [[ARR_SIZE_REF]], +// CHECK: [[ARRS_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2 +// CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[ARRS_PRIV]] to i8* +// CHECK: store i8* [[BITCAST]], i8** [[ARRS_PRIV_REF]], +// CHECK: [[ARRS_SIZE_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3 +// CHECK: [[BITCAST:%.+]] = inttoptr i64 [[ARRS_SIZE]] to i8* +// CHECK: store i8* [[BITCAST]], i8** [[ARRS_SIZE_REF]], + +// res = __kmpc_reduce(, , , sizeof(RedList), RedList, reduce_func, &); + +// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] +// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] +// CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8* +// CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 2, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]]) + +// switch(res) +// CHECK: switch i32 [[RES]], label %[[RED_DONE:.+]] [ +// CHECK: i32 1, label %[[CASE1:.+]] +// CHECK: i32 2, label %[[CASE2:.+]] +// CHECK: ] + +// case 1: +// CHECK: [[CASE1]] + +// arr[:] += arr_reduction[:]; +// CHECK: [[END:%.+]] = getelementptr i32, i32* [[LB1_0]], i64 [[ARR_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[LB1_0]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi i32* +// CHECK: [[ADD:%.+]] = add nsw i32 % +// CHECK: store i32 [[ADD]], i32* % +// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]] +// CHECK: br i1 [[DONE]], + +// arrs[:] = var.operator &(arrs_reduction[:]); +// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_LB:%.+]], i64 [[ARRS_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi [[S_FLOAT_TY]]* +// CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) +// CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] +// CHECK: br i1 [[DONE]], + +// __kmpc_end_reduce(, , &); +// CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]]) + +// break; +// CHECK: br label %[[RED_DONE]] + +// case 2: +// CHECK: [[CASE2]] + +// arr[:] += arr_reduction[:]; +// CHECK: [[END:%.+]] = getelementptr i32, i32* [[LB1_0]], i64 [[ARR_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[LB1_0]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi i32* +// CHECK: atomicrmw add i32* %{{.+}}, i32 %{{.+}} monotonic +// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]] +// CHECK: br i1 [[DONE]], + +// arrs[:] = var.operator &(arrs_reduction[:]); +// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_LB:%.+]], i64 [[ARRS_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi [[S_FLOAT_TY]]* +// CHECK: call void @__kmpc_critical( +// CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) +// CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: call void @__kmpc_end_critical( +// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] +// CHECK: br i1 [[DONE]], + +// break; +// CHECK: br label %[[RED_DONE]] +// CHECK: [[RED_DONE]] + +// Check destruction of private copy. +// CHECK: [[END:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_PRIV]], i64 [[ARRS_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_PRIV]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi [[S_FLOAT_TY]]* +// CHECK: call void @_ZN1SIfED1Ev([[S_FLOAT_TY]]* % +// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[ARRS_PRIV]] +// CHECK: br i1 [[DONE]], +// CHECK: call void @llvm.stackrestore(i8* +// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] +// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] +// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) + +// CHECK: ret void + +// void reduce_func(void *lhs[], void *rhs[]) { +// *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); +// ... +// *(Type-1*)lhs[-1] = ReductionOperation-1(*(Type-1*)lhs[-1], +// *(Type-1*)rhs[-1]); +// } +// CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*) +// arr_rhs = (int*)rhs[0]; +// CHECK: [[ARR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0 +// CHECK: [[ARR_RHS_VOID:%.+]] = load i8*, i8** [[ARR_RHS_REF]], +// CHECK: [[ARR_RHS:%.+]] = bitcast i8* [[ARR_RHS_VOID]] to i32* +// arr_lhs = (int*)lhs[0]; +// CHECK: [[ARR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0 +// CHECK: [[ARR_LHS_VOID:%.+]] = load i8*, i8** [[ARR_LHS_REF]], +// CHECK: [[ARR_LHS:%.+]] = bitcast i8* [[ARR_LHS_VOID]] to i32* + +// arr_size = (size_t)lhs[1]; +// CHECK: [[ARR_SIZE_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1 +// CHECK: [[ARR_SIZE_VOID:%.+]] = load i8*, i8** [[ARR_SIZE_REF]], +// CHECK: [[ARR_SIZE:%.+]] = ptrtoint i8* [[ARR_SIZE_VOID]] to i64 + +// arrs_rhs = (S*)rhs[2]; +// CHECK: [[ARRS_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2 +// CHECK: [[ARRS_RHS_VOID:%.+]] = load i8*, i8** [[ARRS_RHS_REF]], +// CHECK: [[ARRS_RHS:%.+]] = bitcast i8* [[ARRS_RHS_VOID]] to [[S_FLOAT_TY]]* +// arrs_lhs = (S*)lhs[2]; +// CHECK: [[ARRS_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2 +// CHECK: [[ARRS_LHS_VOID:%.+]] = load i8*, i8** [[ARRS_LHS_REF]], +// CHECK: [[ARRS_LHS:%.+]] = bitcast i8* [[ARRS_LHS_VOID]] to [[S_FLOAT_TY]]* + +// arrs_size = (size_t)lhs[3]; +// CHECK: [[ARRS_SIZE_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3 +// CHECK: [[ARRS_SIZE_VOID:%.+]] = load i8*, i8** [[ARRS_SIZE_REF]], +// CHECK: [[ARRS_SIZE:%.+]] = ptrtoint i8* [[ARRS_SIZE_VOID]] to i64 + +// arr_lhs[:] += arr_rhs[:]; +// CHECK: [[END:%.+]] = getelementptr i32, i32* [[ARR_LHS]], i64 [[ARR_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[ARR_LHS]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi i32* +// CHECK: [[ADD:%.+]] = add nsw i32 % +// CHECK: store i32 [[ADD]], i32* % +// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]] +// CHECK: br i1 [[DONE]], + +// arrs_lhs = arrs_lhs.operator &(arrs_rhs); +// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_LB:%.+]], i64 [[ARRS_SIZE]] +// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]] +// CHECK: br i1 [[ISEMPTY]], +// CHECK: phi [[S_FLOAT_TY]]* +// CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) +// CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] +// CHECK: br i1 [[DONE]], + +// CHECK: ret void + // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) diff --git a/test/OpenMP/for_reduction_messages.cpp b/test/OpenMP/for_reduction_messages.cpp index 48a0448656..05368f1a75 100644 --- a/test/OpenMP/for_reduction_messages.cpp +++ b/test/OpenMP/for_reduction_messages.cpp @@ -64,6 +64,8 @@ public: S3 h, k; #pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} +char *get(); + template // expected-note {{declared here}} T tmain(T argc) { const T d = T(); // expected-note 4 {{'d' defined here}} @@ -196,6 +198,14 @@ T tmain(T argc) { #pragma omp for reduction(+ : fl) // expected-error 2 {{reduction variable must be shared}} for (int i = 0; i < 10; ++i) foo(); +#pragma omp parallel private(qa) // expected-note 2 {{defined as private}} +#pragma omp for reduction(+ : qa[1], get()[0]) // expected-error 2 {{reduction variable must be shared}} expected-error {{expected variable name as a base of the array subscript}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(qa) +#pragma omp for reduction(+ : qa[1], qa[0]) // expected-error 2 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 2 {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); #pragma omp parallel reduction(* : fl) // expected-note 2 {{defined as reduction}} #pragma omp for reduction(+ : fl) // expected-error 2 {{reduction variable must be shared}} for (int i = 0; i < 10; ++i) @@ -353,6 +363,14 @@ int main(int argc, char **argv) { #pragma omp for reduction(+ : fl) // expected-error {{reduction variable must be shared}} for (int i = 0; i < 10; ++i) foo(); +#pragma omp parallel private(argv) // expected-note {{defined as private}} +#pragma omp for reduction(+ : argv[1], get()[0]) // expected-error {{reduction variable must be shared}} expected-error {{expected variable name as a base of the array subscript}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(qa) +#pragma omp for reduction(+ : qa[1], qa[0]) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); #pragma omp parallel reduction(* : fl) // expected-note {{defined as reduction}} #pragma omp for reduction(+ : fl) // expected-error {{reduction variable must be shared}} for (int i = 0; i < 10; ++i) diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index 9e66d0c653..96923dc7de 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -2106,6 +2106,9 @@ void OMPClauseEnqueue::VisitOMPSharedClause(const OMPSharedClause *C) { } void OMPClauseEnqueue::VisitOMPReductionClause(const OMPReductionClause *C) { VisitOMPClauseList(C); + for (auto *E : C->privates()) { + Visitor->AddStmt(E); + } for (auto *E : C->lhs_exprs()) { Visitor->AddStmt(E); } -- 2.40.0