From 033a0e463c5c0fefec21b8f89370dfded35a2a4f Mon Sep 17 00:00:00 2001 From: Carlo Bertolli Date: Mon, 24 Apr 2017 19:50:35 +0000 Subject: [PATCH] Revert r301223 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@301233 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 22 +- lib/CodeGen/CGOpenMPRuntime.h | 40 +- lib/CodeGen/CGStmtOpenMP.cpp | 436 +--- lib/CodeGen/CodeGenFunction.h | 96 +- .../distribute_parallel_for_codegen.cpp | 2260 ----------------- ...bute_parallel_for_firstprivate_codegen.cpp | 619 ----- .../distribute_parallel_for_if_codegen.cpp | 192 -- ...ibute_parallel_for_lastprivate_codegen.cpp | 653 ----- ...ibute_parallel_for_num_threads_codegen.cpp | 121 - ...istribute_parallel_for_private_codegen.cpp | 297 --- ...tribute_parallel_for_proc_bind_codegen.cpp | 93 - 11 files changed, 123 insertions(+), 4706 deletions(-) delete mode 100644 test/OpenMP/distribute_parallel_for_codegen.cpp delete mode 100644 test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp delete mode 100644 test/OpenMP/distribute_parallel_for_if_codegen.cpp delete mode 100644 test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp delete mode 100644 test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp delete mode 100644 test/OpenMP/distribute_parallel_for_private_codegen.cpp delete mode 100644 test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index d1a706b882..874b6a69e5 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2466,14 +2466,16 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, return Schedule | Modifier; } -void CGOpenMPRuntime::emitForDispatchInit( - CodeGenFunction &CGF, SourceLocation Loc, - const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, - bool Ordered, const DispatchRTInput &DispatchValues) { +void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, + SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, + unsigned IVSize, bool IVSigned, + bool Ordered, llvm::Value *UB, + llvm::Value *Chunk) { if (!CGF.HaveInsertPoint()) return; - OpenMPSchedType Schedule = getRuntimeSchedule( - ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && @@ -2484,14 +2486,14 @@ void CGOpenMPRuntime::emitForDispatchInit( // kmp_int[32|64] stride, kmp_int[32|64] chunk); // If the Chunk was not specified in the clause - use default value 1. - llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk - : CGF.Builder.getIntN(IVSize, 1); + if (Chunk == nullptr) + Chunk = CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - DispatchValues.LB, // Lower - DispatchValues.UB, // Upper + CGF.Builder.getIntN(IVSize, 0), // Lower + UB, // Upper CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 6f460f1217..7901a6b7a8 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -672,50 +672,16 @@ public: /// virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const; - /// struct with the values to be passed to the dispatch runtime function - struct DispatchRTInput { - /// Loop lower bound - llvm::Value *LB = nullptr; - /// Loop upper bound - llvm::Value *UB = nullptr; - /// Chunk size specified using 'schedule' clause (nullptr if chunk - /// was not specified) - llvm::Value *Chunk = nullptr; - DispatchRTInput() = default; - DispatchRTInput(llvm::Value *LB, llvm::Value *UB, llvm::Value *Chunk) - : LB(LB), UB(UB), Chunk(Chunk) {} - }; - - /// Call the appropriate runtime routine to initialize it before start - /// of loop. - - /// This is used for non static scheduled types and when the ordered - /// clause is present on the loop construct. - /// Depending on the loop schedule, it is necessary to call some runtime - /// routine before start of the OpenMP loop to get the loop upper / lower - /// bounds \a LB and \a UB and stride \a ST. - /// - /// \param CGF Reference to current CodeGenFunction. - /// \param Loc Clang source location. - /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. - /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. - /// \param Ordered true if loop is ordered, false otherwise. - /// \param DispatchValues struct containing llvm values for lower bound, upper - /// bound, and chunk expression. - /// For the default (nullptr) value, the chunk 1 will be used. - /// virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, - const DispatchRTInput &DispatchValues); + llvm::Value *UB, + llvm::Value *Chunk = nullptr); /// \brief Call the appropriate runtime routine to initialize it before start /// of loop. /// - /// This is used only in case of static schedule, when the user did not - /// specify a ordered clause on the loop construct. - /// Depending on the loop schedule, it is necessary to call some runtime + /// Depending on the loop schedule, it is nesessary to call some runtime /// routine before start of the OpenMP loop to get the loop upper / lower /// bounds \a LB and \a UB and stride \a ST. /// diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index f738dd0750..22269e42c7 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -87,8 +87,7 @@ public: class OMPParallelScope final : public OMPLexicalScope { bool EmitPreInitStmt(const OMPExecutableDirective &S) { OpenMPDirectiveKind Kind = S.getDirectiveKind(); - return !(isOpenMPTargetExecutionDirective(Kind) || - isOpenMPLoopBoundSharingDirective(Kind)) && + return !isOpenMPTargetExecutionDirective(Kind) && isOpenMPParallelDirective(Kind); } @@ -1250,20 +1249,10 @@ static void emitPostUpdateForReductionClause( CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -namespace { -/// Codegen lambda for appending distribute lower and upper bounds to outlined -/// parallel function. This is necessary for combined constructs such as -/// 'distribute parallel for' -typedef llvm::function_ref &)> - CodeGenBoundParametersTy; -} // anonymous namespace - -static void emitCommonOMPParallelDirective( - CodeGenFunction &CGF, const OMPExecutableDirective &S, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - const CodeGenBoundParametersTy &CodeGenBoundParameters) { +static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -1290,20 +1279,11 @@ static void emitCommonOMPParallelDirective( OMPParallelScope Scope(CGF, S); llvm::SmallVector CapturedVars; - // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk - // lower and upper bounds with the pragma 'for' chunking mechanism. - // The following lambda takes care of appending the lower and upper bound - // parameters when necessary - CodeGenBoundParameters(CGF, S, CapturedVars); CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } -static void emitEmptyBoundParameters(CodeGenFunction &, - const OMPExecutableDirective &, - llvm::SmallVectorImpl &) {} - void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -1324,8 +1304,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); emitPostUpdateForReductionClause( *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -1670,13 +1649,6 @@ void CodeGenFunction::EmitOMPSimdFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } -static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, - const OMPLoopDirective &S, - CodeGenFunction::JumpDest LoopExit) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); -}; - void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); @@ -1759,12 +1731,9 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPOuterLoop( - bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, - CodeGenFunction::OMPPrivateScope &LoopScope, - const CodeGenFunction::OMPLoopArguments &LoopArgs, - const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, - const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { +void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); const Expr *IVExpr = S.getIterationVariable(); @@ -1782,18 +1751,15 @@ void CodeGenFunction::EmitOMPOuterLoop( llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { - // UB = min(UB, GlobalUB) or - // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. - // 'distribute parallel for') - EmitIgnoredExpr(LoopArgs.EUB); + // UB = min(UB, GlobalUB) + EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB - EmitIgnoredExpr(LoopArgs.Init); + EmitIgnoredExpr(S.getInit()); // IV < UB - BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); + BoolCondVal = EvaluateExprAsBool(S.getCond()); } else { - BoolCondVal = - RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, - LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); + BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, + LB, UB, ST); } // If there are any cleanups between here and the loop-exit scope, @@ -1813,7 +1779,7 @@ void CodeGenFunction::EmitOMPOuterLoop( // Emit "IV = LB" (in case of static schedule, we have already calculated new // LB for loop condition and emitted it above). if (DynamicOrOrdered) - EmitIgnoredExpr(LoopArgs.Init); + EmitIgnoredExpr(S.getInit()); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); @@ -1827,27 +1793,24 @@ void CodeGenFunction::EmitOMPOuterLoop( EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getLocStart(); - - // when 'distribute' is not combined with a 'for': - // while (idx <= UB) { BODY; ++idx; } - // when 'distribute' is combined with a 'for' - // (e.g. 'distribute parallel for') - // while (idx <= UB) { ; idx += ST; } - EmitOMPInnerLoop( - S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, - [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { - CodeGenLoop(CGF, S, LoopExit); - }, - [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { - CodeGenOrdered(CGF, Loc, IVSize, IVSigned); - }); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( + CGF, Loc, IVSize, IVSigned); + } + }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); if (!DynamicOrOrdered) { // Emit "LB = LB + Stride", "UB = UB + Stride". - EmitIgnoredExpr(LoopArgs.NextLB); - EmitIgnoredExpr(LoopArgs.NextUB); + EmitIgnoredExpr(S.getNextLowerBound()); + EmitIgnoredExpr(S.getNextUpperBound()); } EmitBranch(CondBlock); @@ -1866,8 +1829,7 @@ void CodeGenFunction::EmitOMPOuterLoop( void CodeGenFunction::EmitOMPForOuterLoop( const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - const OMPLoopArguments &LoopArgs, - const CodeGenDispatchBoundsTy &CGDispatchBounds) { + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -1876,7 +1838,7 @@ void CodeGenFunction::EmitOMPForOuterLoop( assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, - LoopArgs.Chunk != nullptr)) && + /*Chunked=*/Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -1934,46 +1896,22 @@ void CodeGenFunction::EmitOMPForOuterLoop( const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (DynamicOrOrdered) { - auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); - llvm::Value *LBVal = DispatchBounds.first; - llvm::Value *UBVal = DispatchBounds.second; - CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, - LoopArgs.Chunk}; + llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, Ordered, DipatchRTInputValues); + IVSigned, Ordered, UBVal, Chunk); } else { RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, - LoopArgs.ST, LoopArgs.Chunk); + Ordered, IL, LB, UB, ST, Chunk); } - auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, - const unsigned IVSize, - const bool IVSigned) { - if (Ordered) { - CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, - IVSigned); - } - }; - - OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, - LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); - OuterLoopArgs.IncExpr = S.getInc(); - OuterLoopArgs.Init = S.getInit(); - OuterLoopArgs.Cond = S.getCond(); - OuterLoopArgs.NextLB = S.getNextLowerBound(); - OuterLoopArgs.NextUB = S.getNextUpperBound(); - EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, - emitOMPLoopBodyWithStopPoint, CodeGenOrdered); + EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, + ST, IL, Chunk); } -static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, - const unsigned IVSize, const bool IVSigned) {} - void CodeGenFunction::EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, - const CodeGenLoopTy &CodeGenLoopContent) { + OpenMPDistScheduleClauseKind ScheduleKind, + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); @@ -1986,159 +1924,26 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, /* Ordered = */ false, LoopArgs.IL, - LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, - LoopArgs.Chunk); + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, /* Ordered = */ false, + IL, LB, UB, ST, Chunk); - // for combined 'distribute' and 'for' the increment expression of distribute - // is store in DistInc. For 'distribute' alone, it is in Inc. - Expr *IncExpr; - if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) - IncExpr = S.getDistInc(); - else - IncExpr = S.getInc(); - - // this routine is shared by 'omp distribute parallel for' and - // 'omp distribute': select the right EUB expression depending on the - // directive - OMPLoopArguments OuterLoopArgs; - OuterLoopArgs.LB = LoopArgs.LB; - OuterLoopArgs.UB = LoopArgs.UB; - OuterLoopArgs.ST = LoopArgs.ST; - OuterLoopArgs.IL = LoopArgs.IL; - OuterLoopArgs.Chunk = LoopArgs.Chunk; - OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedEnsureUpperBound() - : S.getEnsureUpperBound(); - OuterLoopArgs.IncExpr = IncExpr; - OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedInit() - : S.getInit(); - OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedCond() - : S.getCond(); - OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedNextLowerBound() - : S.getNextLowerBound(); - OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedNextUpperBound() - : S.getNextUpperBound(); - - EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, - LoopScope, OuterLoopArgs, CodeGenLoopContent, - emitEmptyOrdered); -} - -/// Emit a helper variable and return corresponding lvalue. -static LValue EmitOMPHelperVar(CodeGenFunction &CGF, - const DeclRefExpr *Helper) { - auto VDecl = cast(Helper->getDecl()); - CGF.EmitVarDecl(*VDecl); - return CGF.EmitLValue(Helper); -} - -static std::pair -emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, - const OMPExecutableDirective &S) { - const OMPLoopDirective &LS = cast(S); - LValue LB = - EmitOMPHelperVar(CGF, cast(LS.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(CGF, cast(LS.getUpperBoundVariable())); - - // When composing 'distribute' with 'for' (e.g. as in 'distribute - // parallel for') we need to use the 'distribute' - // chunk lower and upper bounds rather than the whole loop iteration - // space. These are parameters to the outlined function for 'parallel' - // and we copy the bounds of the previous schedule into the - // the current ones. - LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); - LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); - llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); - PrevLBVal = CGF.EmitScalarConversion( - PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), - LS.getIterationVariable()->getType(), SourceLocation()); - llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); - PrevUBVal = CGF.EmitScalarConversion( - PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), - LS.getIterationVariable()->getType(), SourceLocation()); - - CGF.EmitStoreOfScalar(PrevLBVal, LB); - CGF.EmitStoreOfScalar(PrevUBVal, UB); - - return {LB, UB}; -} - -/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then -/// we need to use the LB and UB expressions generated by the worksharing -/// code generation support, whereas in non combined situations we would -/// just emit 0 and the LastIteration expression -/// This function is necessary due to the difference of the LB and UB -/// types for the RT emission routines for 'for_static_init' and -/// 'for_dispatch_init' -static std::pair -emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, - const OMPExecutableDirective &S, - Address LB, Address UB) { - const OMPLoopDirective &LS = cast(S); - const Expr *IVExpr = LS.getIterationVariable(); - // when implementing a dynamic schedule for a 'for' combined with a - // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop - // is not normalized as each team only executes its own assigned - // distribute chunk - QualType IteratorTy = IVExpr->getType(); - llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, - SourceLocation()); - llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, - SourceLocation()); - return {LBVal, UBVal}; -}; - -static void emitDistributeParallelForDistributeInnerBoundParams( - CodeGenFunction &CGF, const OMPExecutableDirective &S, - llvm::SmallVectorImpl &CapturedVars) { - const auto &Dir = cast(S); - LValue LB = - CGF.EmitLValue(cast(Dir.getCombinedLowerBoundVariable())); - auto LBCast = CGF.Builder.CreateIntCast( - CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(LBCast); - LValue UB = - CGF.EmitLValue(cast(Dir.getCombinedUpperBoundVariable())); - - auto UBCast = CGF.Builder.CreateIntCast( - CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(UBCast); -}; - -static void -emitInnerParallelForWhenCombined(CodeGenFunction &CGF, - const OMPLoopDirective &S, - CodeGenFunction::JumpDest LoopExit) { - auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), - emitDistributeParallelForInnerBounds, - emitDistributeParallelForDispatchBounds); - }; - - emitCommonOMPParallelDirective( - CGF, S, OMPD_for, CGInlinedWorksharingLoop, - emitDistributeParallelForDistributeInnerBoundParams); + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, + S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); } void CodeGenFunction::EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S) { - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, - S.getDistInc()); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, - /*HasCancel=*/false); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, - /*HasCancel=*/false); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_distribute_parallel_for, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, + /*HasCancel=*/false); + CGF.EmitStmt( + cast(S.getAssociatedStmt())->getCapturedStmt()); + }); } void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( @@ -2276,6 +2081,14 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( }); } +/// \brief Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2288,10 +2101,7 @@ namespace { }; } // namespace -bool CodeGenFunction::EmitOMPWorksharingLoop( - const OMPLoopDirective &S, Expr *EUB, - const CodeGenLoopBoundsTy &CodeGenLoopBounds, - const CodeGenDispatchBoundsTy &CGDispatchBounds) { +bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Emit the loop iteration variable. auto IVExpr = cast(S.getIterationVariable()); auto IVDecl = cast(IVExpr->getDecl()); @@ -2341,10 +2151,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( emitAlignedClause(*this, S); EmitOMPLinearClauseInit(S); // Emit helper vars inits. - - std::pair Bounds = CodeGenLoopBounds(*this, S); - LValue LB = Bounds.first; - LValue UB = Bounds.second; + LValue LB = + EmitOMPHelperVar(*this, cast(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast(S.getUpperBoundVariable())); LValue ST = EmitOMPHelperVar(*this, cast(S.getStrideVariable())); LValue IL = @@ -2430,11 +2240,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), - ST.getAddress(), IL.getAddress(), - Chunk, EUB); EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, - LoopArguments, CGDispatchBounds); + LB.getAddress(), UB.getAddress(), ST.getAddress(), + IL.getAddress(), Chunk); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S, @@ -2472,42 +2280,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( return HasLastprivateClause; } -/// The following two functions generate expressions for the loop lower -/// and upper bounds in case of static and dynamic (dispatch) schedule -/// of the associated 'for' or 'distribute' loop. -static std::pair -emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { - const OMPLoopDirective &LS = cast(S); - LValue LB = - EmitOMPHelperVar(CGF, cast(LS.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(CGF, cast(LS.getUpperBoundVariable())); - return {LB, UB}; -} - -/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not -/// consider the lower and upper bound expressions generated by the -/// worksharing loop support, but we use 0 and the iteration space size as -/// constants -static std::pair -emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, - Address LB, Address UB) { - const OMPLoopDirective &LS = cast(S); - const Expr *IVExpr = LS.getIterationVariable(); - const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); - llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); - llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); - return {LBVal, UBVal}; -} - void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2525,9 +2303,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2778,11 +2554,9 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); - CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, - emitDispatchForLoopBounds); + CGF.EmitOMPWorksharingLoop(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -2790,11 +2564,9 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, - emitDispatchForLoopBounds); + CGF.EmitOMPWorksharingLoop(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); } void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -2804,8 +2576,7 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitSections(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, @@ -3023,9 +2794,7 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } -void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, - const CodeGenLoopTy &CodeGenLoop, - Expr *IncExpr) { +void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { // Emit the loop iteration variable. auto IVExpr = cast(S.getIterationVariable()); auto IVDecl = cast(IVExpr->getDecl()); @@ -3066,17 +2835,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // Emit 'then' code. { // Emit helper vars inits. - - LValue LB = EmitOMPHelperVar( - *this, cast( - (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedLowerBoundVariable() - : S.getLowerBoundVariable()))); - LValue UB = EmitOMPHelperVar( - *this, cast( - (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedUpperBoundVariable() - : S.getUpperBoundVariable()))); + LValue LB = + EmitOMPHelperVar(*this, cast(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast(S.getUpperBoundVariable())); LValue ST = EmitOMPHelperVar(*this, cast(S.getStrideVariable())); LValue IL = @@ -3128,25 +2890,15 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedEnsureUpperBound() - : S.getEnsureUpperBound()); + EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; - EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedInit() - : S.getInit()); - - Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedCond() - : S.getCond(); - - // for distribute alone, codegen + EmitIgnoredExpr(S.getInit()); // while (idx <= UB) { BODY; ++idx; } - // when combined with 'for' (e.g. as in 'distribute parallel for') - // while (idx <= UB) { ; idx += ST; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, - [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { - CodeGenLoop(CGF, S, LoopExit); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); }, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); @@ -3155,11 +2907,9 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - const OMPLoopArguments LoopArguments = { - LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), - Chunk}; - EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, - CodeGenLoop); + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, + LB.getAddress(), UB.getAddress(), ST.getAddress(), + IL.getAddress(), Chunk); } // Emit final copy of the lastprivate variables if IsLastIter != 0. @@ -3181,8 +2931,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - - CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + CGF.EmitOMPDistributeLoop(S); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, @@ -4091,8 +3840,7 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 1ded824ba5..fa72019eb0 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -175,25 +175,6 @@ public: // because of jumps. VarBypassDetector Bypasses; - // CodeGen lambda for loops and support for ordered clause - typedef llvm::function_ref - CodeGenLoopTy; - typedef llvm::function_ref - CodeGenOrderedTy; - - // Codegen lambda for loop bounds in worksharing loop constructs - typedef llvm::function_ref( - CodeGenFunction &, const OMPExecutableDirective &S)> - CodeGenLoopBoundsTy; - - // Codegen lambda for loop bounds in dispatch-based loop implementation - typedef llvm::function_ref( - CodeGenFunction &, const OMPExecutableDirective &S, Address LB, - Address UB)> - CodeGenDispatchBoundsTy; - /// \brief CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, @@ -2775,6 +2756,7 @@ public: void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); + void EmitOMPDistributeLoop(const OMPDistributeDirective &S); void EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S); void EmitOMPDistributeParallelForSimdDirective( @@ -2831,78 +2813,32 @@ public: void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S, OMPPrivateScope &LoopScope); - /// Helper for the OpenMP loop directives. - void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); - - /// \brief Emit code for the worksharing loop-based directive. - /// \return true, if this construct has any lastprivate clause, false - - /// otherwise. - bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB, - const CodeGenLoopBoundsTy &CodeGenLoopBounds, - const CodeGenDispatchBoundsTy &CGDispatchBounds); - private: /// Helpers for blocks llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); /// Helpers for the OpenMP loop directives. + void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); void EmitOMPSimdFinal( const OMPLoopDirective &D, const llvm::function_ref &CondGen); - - void EmitOMPDistributeLoop(const OMPLoopDirective &S, - const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr); - - /// struct with the values to be passed to the OpenMP loop-related functions - struct OMPLoopArguments { - /// loop lower bound - Address LB = Address::invalid(); - /// loop upper bound - Address UB = Address::invalid(); - /// loop stride - Address ST = Address::invalid(); - /// isLastIteration argument for runtime functions - Address IL = Address::invalid(); - /// Chunk value generated by sema - llvm::Value *Chunk = nullptr; - /// EnsureUpperBound - Expr *EUB = nullptr; - /// IncrementExpression - Expr *IncExpr = nullptr; - /// Loop initialization - Expr *Init = nullptr; - /// Loop exit condition - Expr *Cond = nullptr; - /// Update of LB after a whole chunk has been executed - Expr *NextLB = nullptr; - /// Update of UB after a whole chunk has been executed - Expr *NextUB = nullptr; - OMPLoopArguments() = default; - OMPLoopArguments(Address LB, Address UB, Address ST, Address IL, - llvm::Value *Chunk = nullptr, Expr *EUB = nullptr, - Expr *IncExpr = nullptr, Expr *Init = nullptr, - Expr *Cond = nullptr, Expr *NextLB = nullptr, - Expr *NextUB = nullptr) - : LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB), - IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB), - NextUB(NextUB) {} - }; - void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, - const OMPLoopArguments &LoopArgs, - const CodeGenLoopTy &CodeGenLoop, - const CodeGenOrderedTy &CodeGenOrdered); + /// \brief Emit code for the worksharing loop-based directive. + /// \return true, if this construct has any lastprivate clause, false - + /// otherwise. + bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); + void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, bool Ordered, - const OMPLoopArguments &LoopArgs, - const CodeGenDispatchBoundsTy &CGDispatchBounds); - void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind, - const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, - const OMPLoopArguments &LoopArgs, - const CodeGenLoopTy &CodeGenLoopContent); + OMPPrivateScope &LoopScope, bool Ordered, Address LB, + Address UB, Address ST, Address IL, + llvm::Value *Chunk); + void EmitOMPDistributeOuterLoop( + OpenMPDistScheduleClauseKind ScheduleKind, + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); /// \brief Emit code for sections directive. void EmitSections(const OMPExecutableDirective &S); diff --git a/test/OpenMP/distribute_parallel_for_codegen.cpp b/test/OpenMP/distribute_parallel_for_codegen.cpp deleted file mode 100644 index aeb00ec7db..0000000000 --- a/test/OpenMP/distribute_parallel_for_codegen.cpp +++ /dev/null @@ -1,2260 +0,0 @@ -// Test host code gen -// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 -// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 -// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 -// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 - -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - - -template -T tmain() { - T *a, *b, *c; - int n = 10000; - int ch = 100; - - // no schedule clauses - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - } - - // dist_schedule: static no chunk - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for dist_schedule(static) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - } - - // dist_schedule: static chunk - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for dist_schedule(static, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - } - - // schedule: static no chunk - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for schedule(static) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - } - - // schedule: static chunk - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for schedule(static, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - } - - // schedule: dynamic no chunk - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for schedule(dynamic) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - } - - // schedule: dynamic chunk - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for schedule(dynamic, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - } - - return T(); -} - -int main() { - double *a, *b, *c; - int n = 10000; - int ch = 100; - -#ifdef LAMBDA - // LAMBDA-LABEL: @main - // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( - [&]() { - // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( - - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( - - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( - - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( - - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( - - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( - - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( - - // no schedule clauses - #pragma omp target - #pragma omp teams - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) - - #pragma omp distribute parallel for - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( - // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca - // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca - // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca - // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca - - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - - // check EUB for distribute - // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, - // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] - // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] - // LAMBDA-DAG: [[EUB_TRUE]]: - // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, - // LAMBDA: br label %[[EUB_END:.+]] - // LAMBDA-DAG: [[EUB_FALSE]]: - // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: br label %[[EUB_END]] - // LAMBDA-DAG: [[EUB_END]]: - // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] - // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - - // initialize omp.iv - // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], - // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] - - // check exit condition - // LAMBDA: [[OMP_JUMP_BACK]]: - // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // LAMBDA: [[DIST_BODY]]: - // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to - // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to - // check that distlb and distub are properly passed to fork_call - // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) - // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) - // LAMBDA: br label %[[DIST_INC:.+]] - - // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch - // LAMBDA: [[DIST_INC]]: - // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], - // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], - // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] - // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[OMP_JUMP_BACK]] - - // LAMBDA-DAG: call void @__kmpc_for_static_fini( - // LAMBDA: ret - - // implementation of 'parallel for' - // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - - // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - - // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used - // In this case we use EUB - // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], - // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, - // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] - // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] - // LAMBDA: [[PF_EUB_TRUE]]: - // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, - // LAMBDA: br label %[[PF_EUB_END:.+]] - // LAMBDA-DAG: [[PF_EUB_FALSE]]: - // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], - // LAMBDA: br label %[[PF_EUB_END]] - // LAMBDA-DAG: [[PF_EUB_END]]: - // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] - // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - - // initialize omp.iv - // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] - - // check exit condition - // LAMBDA: [[OMP_PF_JUMP_BACK]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], - // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] - // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // LAMBDA: [[PF_BODY]]: - // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label {{.+}} - - // check stride 1 for 'for' in 'distribute parallel for' - // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 - // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] - - // LAMBDA-DAG: call void @__kmpc_for_static_fini( - // LAMBDA: ret - - [&]() { - a[i] = b[i] + c[i]; - }(); - } - - // dist_schedule: static no chunk (same sa default - no dist_schedule) - #pragma omp target - #pragma omp teams - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) - - #pragma omp distribute parallel for dist_schedule(static) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( - // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca - // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca - // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca - // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca - - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - - // check EUB for distribute - // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, - // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] - // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] - // LAMBDA-DAG: [[EUB_TRUE]]: - // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, - // LAMBDA: br label %[[EUB_END:.+]] - // LAMBDA-DAG: [[EUB_FALSE]]: - // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: br label %[[EUB_END]] - // LAMBDA-DAG: [[EUB_END]]: - // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] - // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - - // initialize omp.iv - // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], - // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] - - // check exit condition - // LAMBDA: [[OMP_JUMP_BACK]]: - // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // LAMBDA: [[DIST_BODY]]: - // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to - // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to - // check that distlb and distub are properly passed to fork_call - // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) - // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) - // LAMBDA: br label %[[DIST_INC:.+]] - - // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch - // LAMBDA: [[DIST_INC]]: - // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], - // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], - // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] - // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[OMP_JUMP_BACK]] - - // LAMBDA-DAG: call void @__kmpc_for_static_fini( - // LAMBDA: ret - - // implementation of 'parallel for' - // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - - // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - - // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used - // In this case we use EUB - // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], - // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, - // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] - // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] - // LAMBDA: [[PF_EUB_TRUE]]: - // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, - // LAMBDA: br label %[[PF_EUB_END:.+]] - // LAMBDA-DAG: [[PF_EUB_FALSE]]: - // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], - // LAMBDA: br label %[[PF_EUB_END]] - // LAMBDA-DAG: [[PF_EUB_END]]: - // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] - // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - - // initialize omp.iv - // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] - - // check exit condition - // LAMBDA: [[OMP_PF_JUMP_BACK]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], - // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] - // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // LAMBDA: [[PF_BODY]]: - // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label {{.+}} - - // check stride 1 for 'for' in 'distribute parallel for' - // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 - // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] - - // LAMBDA-DAG: call void @__kmpc_for_static_fini( - // LAMBDA: ret - [&]() { - a[i] = b[i] + c[i]; - }(); - } - - // dist_schedule: static chunk - #pragma omp target - #pragma omp teams - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) - - #pragma omp distribute parallel for dist_schedule(static, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( - // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca - // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca - // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca - // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca - - // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: - // check EUB for distribute - // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, - // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] - // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] - // LAMBDA-DAG: [[EUB_TRUE]]: - // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, - // LAMBDA: br label %[[EUB_END:.+]] - // LAMBDA-DAG: [[EUB_FALSE]]: - // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: br label %[[EUB_END]] - // LAMBDA-DAG: [[EUB_END]]: - // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] - // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - - // initialize omp.iv - // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], - // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], - - // check exit condition - // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: - // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // LAMBDA: [[DIST_INNER_LOOP_BODY]]: - // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} - // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} - // check that distlb and distub are properly passed to fork_call - // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) - // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) - // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] - - // check DistInc - // LAMBDA: [[DIST_INNER_LOOP_INC]]: - // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], - // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], - // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] - // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] - - // LAMBDA: [[DIST_INNER_LOOP_END]]: - // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB - // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], - // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], - // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] - // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], - // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], - // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], - // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] - // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] - - // outer loop exit - // LAMBDA: [[DIST_OUTER_LOOP_END]]: - // LAMBDA-DAG: call void @__kmpc_for_static_fini( - // LAMBDA: ret - - // skip implementation of 'parallel for': using default scheduling and was tested above - [&]() { - a[i] = b[i] + c[i]; - }(); - } - - // schedule: static no chunk - #pragma omp target - #pragma omp teams - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) - - #pragma omp distribute parallel for schedule(static) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( - // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca - // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca - // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca - // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca - - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, - // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case - // LAMBDA: ret - - // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) - // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - - // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - - // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used - // In this case we use EUB - // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], - // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, - // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] - // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] - // LAMBDA: [[PF_EUB_TRUE]]: - // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, - // LAMBDA: br label %[[PF_EUB_END:.+]] - // LAMBDA-DAG: [[PF_EUB_FALSE]]: - // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], - // LAMBDA: br label %[[PF_EUB_END]] - // LAMBDA-DAG: [[PF_EUB_END]]: - // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] - // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - - // initialize omp.iv - // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] - - // check exit condition - // LAMBDA: [[OMP_PF_JUMP_BACK]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], - // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] - // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // LAMBDA: [[PF_BODY]]: - // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label {{.+}} - - // check stride 1 for 'for' in 'distribute parallel for' - // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 - // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] - - // LAMBDA-DAG: call void @__kmpc_for_static_fini( - // LAMBDA: ret - - [&]() { - a[i] = b[i] + c[i]; - }(); - } - - // schedule: static chunk - #pragma omp target - #pragma omp teams - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) - - #pragma omp distribute parallel for schedule(static, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, - // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case - // LAMBDA: ret - - // 'parallel for' implementation using outer and inner loops and PrevEUB - // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) - // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - - // check PrevEUB (using PrevUB instead of NumIt as upper bound) - // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: - // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], - // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to - // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] - // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] - // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] - // LAMBDA: [[PF_EUB_TRUE]]: - // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // LAMBDA: br label %[[PF_EUB_END:.+]] - // LAMBDA-DAG: [[PF_EUB_FALSE]]: - // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], - // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to - // LAMBDA: br label %[[PF_EUB_END]] - // LAMBDA-DAG: [[PF_EUB_END]]: - // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] - // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] - // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to - // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], - // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], - - // initialize omp.iv (IV = LB) - // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - - // outer loop: while (IV < UB) { - // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], - // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] - // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: - // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] - - // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], - // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] - // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - - // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: - // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // skip body branch - // LAMBDA: br{{.+}} - // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - - // IV = IV + 1 and inner loop latch - // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], - // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 - // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] - - // check NextLB and NextUB - // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: - // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], - // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] - // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], - // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], - // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] - // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: - // LAMBDA-DAG: call void @__kmpc_for_static_fini( - // LAMBDA: ret - [&]() { - a[i] = b[i] + c[i]; - }(); - } - - // schedule: dynamic no chunk - #pragma omp target - #pragma omp teams - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) - - #pragma omp distribute parallel for schedule(dynamic) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, - // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case - // LAMBDA: ret - - // 'parallel for' implementation using outer and inner loops and PrevEUB - // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], - // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: - // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) - // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 - // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - - // initialize omp.iv (IV = LB) - // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: - // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] - - // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], - // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] - // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - - // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: - // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // skip body branch - // LAMBDA: br{{.+}} - // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - - // IV = IV + 1 and inner loop latch - // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], - // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 - // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] - - // check NextLB and NextUB - // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: - // LAMBDA: ret - [&]() { - a[i] = b[i] + c[i]; - }(); - } - - // schedule: dynamic chunk - #pragma omp target - #pragma omp teams - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) - - #pragma omp distribute parallel for schedule(dynamic, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( - // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, - // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case - // LAMBDA: ret - - // 'parallel for' implementation using outer and inner loops and PrevEUB - // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) - // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], - // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: - // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) - // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 - // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - - // initialize omp.iv (IV = LB) - // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: - // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] - - // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], - // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] - // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - - // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: - // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // skip body branch - // LAMBDA: br{{.+}} - // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - - // IV = IV + 1 and inner loop latch - // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: - // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], - // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 - // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] - - // check NextLB and NextUB - // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: - // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - - // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: - // LAMBDA: ret - [&]() { - a[i] = b[i] + c[i]; - }(); - } - }(); - return 0; -#else - // CHECK-LABEL: @main - - // CHECK: call i{{[0-9]+}} @__tgt_target_teams( - // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( - - // CHECK: call i{{[0-9]+}} @__tgt_target_teams( - // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( - - // CHECK: call i{{[0-9]+}} @__tgt_target_teams( - // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( - - // CHECK: call i{{[0-9]+}} @__tgt_target_teams( - // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( - - // CHECK: call i{{[0-9]+}} @__tgt_target_teams( - // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( - - // CHECK: call i{{[0-9]+}} @__tgt_target_teams( - // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( - - // CHECK: call i{{[0-9]+}} @__tgt_target_teams( - // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( - - // CHECK: call{{.+}} [[TMAIN:@.+]]() - - // no schedule clauses - #pragma omp target - #pragma omp teams - // CHECK: define internal void [[OFFLOADING_FUN_1]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) - - #pragma omp distribute parallel for - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( - // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca - // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca - // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca - // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca - - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - - // check EUB for distribute - // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, - // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] - // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] - // CHECK-DAG: [[EUB_TRUE]]: - // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, - // CHECK: br label %[[EUB_END:.+]] - // CHECK-DAG: [[EUB_FALSE]]: - // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: br label %[[EUB_END]] - // CHECK-DAG: [[EUB_END]]: - // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] - // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - - // initialize omp.iv - // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], - // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[OMP_JUMP_BACK:.+]] - - // check exit condition - // CHECK: [[OMP_JUMP_BACK]]: - // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // CHECK: [[DIST_BODY]]: - // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], - // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], - // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} - // check that distlb and distub are properly passed to fork_call - // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) - // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) - // CHECK: br label %[[DIST_INC:.+]] - - // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch - // CHECK: [[DIST_INC]]: - // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], - // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], - // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] - // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[OMP_JUMP_BACK]] - - // CHECK-DAG: call void @__kmpc_for_static_fini( - // CHECK: ret - - // implementation of 'parallel for' - // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - - // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - - // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used - // In this case we use EUB - // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], - // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, - // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] - // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] - // CHECK: [[PF_EUB_TRUE]]: - // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, - // CHECK: br label %[[PF_EUB_END:.+]] - // CHECK-DAG: [[PF_EUB_FALSE]]: - // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], - // CHECK: br label %[[PF_EUB_END]] - // CHECK-DAG: [[PF_EUB_END]]: - // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] - // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - - // initialize omp.iv - // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] - - // check exit condition - // CHECK: [[OMP_PF_JUMP_BACK]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], - // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], - // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] - // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // CHECK: [[PF_BODY]]: - // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK: br label {{.+}} - - // check stride 1 for 'for' in 'distribute parallel for' - // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 - // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], - // CHECK: br label %[[OMP_PF_JUMP_BACK]] - - // CHECK-DAG: call void @__kmpc_for_static_fini( - // CHECK: ret - } - - // dist_schedule: static no chunk - #pragma omp target - #pragma omp teams - // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) - - #pragma omp distribute parallel for dist_schedule(static) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( - // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca - // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca - // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca - // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca - - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - - // check EUB for distribute - // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, - // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] - // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] - // CHECK-DAG: [[EUB_TRUE]]: - // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, - // CHECK: br label %[[EUB_END:.+]] - // CHECK-DAG: [[EUB_FALSE]]: - // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: br label %[[EUB_END]] - // CHECK-DAG: [[EUB_END]]: - // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] - // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - - // initialize omp.iv - // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], - // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[OMP_JUMP_BACK:.+]] - - // check exit condition - // CHECK: [[OMP_JUMP_BACK]]: - // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // CHECK: [[DIST_BODY]]: - // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], - // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], - // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} - // check that distlb and distub are properly passed to fork_call - // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) - // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) - // CHECK: br label %[[DIST_INC:.+]] - - // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch - // CHECK: [[DIST_INC]]: - // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], - // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], - // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] - // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[OMP_JUMP_BACK]] - - // CHECK-DAG: call void @__kmpc_for_static_fini( - // CHECK: ret - - // implementation of 'parallel for' - // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - - // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - - // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used - // In this case we use EUB - // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], - // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, - // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] - // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] - // CHECK: [[PF_EUB_TRUE]]: - // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, - // CHECK: br label %[[PF_EUB_END:.+]] - // CHECK-DAG: [[PF_EUB_FALSE]]: - // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], - // CHECK: br label %[[PF_EUB_END]] - // CHECK-DAG: [[PF_EUB_END]]: - // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] - // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - - // initialize omp.iv - // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] - - // check exit condition - // CHECK: [[OMP_PF_JUMP_BACK]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], - // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], - // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] - // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // CHECK: [[PF_BODY]]: - // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK: br label {{.+}} - - // check stride 1 for 'for' in 'distribute parallel for' - // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 - // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], - // CHECK: br label %[[OMP_PF_JUMP_BACK]] - - // CHECK-DAG: call void @__kmpc_for_static_fini( - // CHECK: ret - } - - // dist_schedule: static chunk - #pragma omp target - #pragma omp teams - // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) - - #pragma omp distribute parallel for dist_schedule(static, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( - // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca - // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca - // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca - // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca - - // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - - // CHECK: [[DIST_OUTER_LOOP_HEADER]]: - // check EUB for distribute - // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, - // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] - // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] - // CHECK-DAG: [[EUB_TRUE]]: - // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, - // CHECK: br label %[[EUB_END:.+]] - // CHECK-DAG: [[EUB_FALSE]]: - // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: br label %[[EUB_END]] - // CHECK-DAG: [[EUB_END]]: - // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] - // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - - // initialize omp.iv - // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], - // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], - - // check exit condition - // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // CHECK: [[DIST_OUTER_LOOP_BODY]]: - // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // CHECK: [[DIST_INNER_LOOP_HEADER]]: - // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // CHECK: [[DIST_INNER_LOOP_BODY]]: - // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], - // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], - // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} - // check that distlb and distub are properly passed to fork_call - // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) - // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) - // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] - - // check DistInc - // CHECK: [[DIST_INNER_LOOP_INC]]: - // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], - // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], - // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] - // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - - // CHECK: [[DIST_INNER_LOOP_END]]: - // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // CHECK: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB - // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], - // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], - // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] - // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], - // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], - // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], - // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] - // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] - - // outer loop exit - // CHECK: [[DIST_OUTER_LOOP_END]]: - // CHECK-DAG: call void @__kmpc_for_static_fini( - // CHECK: ret - - // skip implementation of 'parallel for': using default scheduling and was tested above - } - - // schedule: static no chunk - #pragma omp target - #pragma omp teams - // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) - - #pragma omp distribute parallel for schedule(static) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( - // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca - // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca - // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca - // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca - - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, - // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case - // CHECK: ret - - // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) - // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - - // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - - // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used - // In this case we use EUB - // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], - // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, - // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] - // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] - // CHECK: [[PF_EUB_TRUE]]: - // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, - // CHECK: br label %[[PF_EUB_END:.+]] - // CHECK-DAG: [[PF_EUB_FALSE]]: - // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], - // CHECK: br label %[[PF_EUB_END]] - // CHECK-DAG: [[PF_EUB_END]]: - // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] - // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - - // initialize omp.iv - // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] - - // check exit condition - // CHECK: [[OMP_PF_JUMP_BACK]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], - // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], - // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] - // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - - // check that PrevLB and PrevUB are passed to the 'for' - // CHECK: [[PF_BODY]]: - // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK: br label {{.+}} - - // check stride 1 for 'for' in 'distribute parallel for' - // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 - // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], - // CHECK: br label %[[OMP_PF_JUMP_BACK]] - - // CHECK-DAG: call void @__kmpc_for_static_fini( - // CHECK: ret - } - - // schedule: static chunk - #pragma omp target - #pragma omp teams - // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) - - #pragma omp distribute parallel for schedule(static, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, - // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case - // CHECK: ret - - // 'parallel for' implementation using outer and inner loops and PrevEUB - // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) - // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - - // check PrevEUB (using PrevUB instead of NumIt as upper bound) - // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: - // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], - // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to - // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] - // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] - // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] - // CHECK: [[PF_EUB_TRUE]]: - // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // CHECK: br label %[[PF_EUB_END:.+]] - // CHECK-DAG: [[PF_EUB_FALSE]]: - // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], - // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to - // CHECK: br label %[[PF_EUB_END]] - // CHECK-DAG: [[PF_EUB_END]]: - // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] - // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] - // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to - // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], - // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], - - // initialize omp.iv (IV = LB) - // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - - // outer loop: while (IV < UB) { - // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], - // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] - // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - - // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: - // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] - - // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], - // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] - // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - - // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: - // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // skip body branch - // CHECK: br{{.+}} - // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - - // IV = IV + 1 and inner loop latch - // CHECK: [[OMP_PF_INNER_LOOP_INC]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], - // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 - // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] - - // check NextLB and NextUB - // CHECK: [[OMP_PF_INNER_LOOP_END]]: - // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - - // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: - // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], - // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] - // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], - // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], - // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], - // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] - // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], - // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - - // CHECK: [[OMP_PF_OUTER_LOOP_END]]: - // CHECK-DAG: call void @__kmpc_for_static_fini( - // CHECK: ret - } - - // schedule: dynamic no chunk - #pragma omp target - #pragma omp teams - // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) - - #pragma omp distribute parallel for schedule(dynamic) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, - // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case - // CHECK: ret - - // 'parallel for' implementation using outer and inner loops and PrevEUB - // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], - // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) - // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - - // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: - // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) - // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 - // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - - // initialize omp.iv (IV = LB) - // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: - // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] - - // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], - // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] - // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - - // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: - // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // skip body branch - // CHECK: br{{.+}} - // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - - // IV = IV + 1 and inner loop latch - // CHECK: [[OMP_PF_INNER_LOOP_INC]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], - // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 - // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] - - // check NextLB and NextUB - // CHECK: [[OMP_PF_INNER_LOOP_END]]: - // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - - // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: - // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - - // CHECK: [[OMP_PF_OUTER_LOOP_END]]: - // CHECK: ret - } - - // schedule: dynamic chunk - #pragma omp target - #pragma omp teams - // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) - - #pragma omp distribute parallel for schedule(dynamic, ch) - for (int i = 0; i < n; ++i) { - a[i] = b[i] + c[i]; - // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( - // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, - // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case - // CHECK: ret - - // 'parallel for' implementation using outer and inner loops and PrevEUB - // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) - // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - - // initialize lb and ub to PrevLB and PrevUB - // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], - // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], - // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} - // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], - // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], - // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], - // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], - // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], - // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) - // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - - // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: - // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) - // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 - // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - - // initialize omp.iv (IV = LB) - // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: - // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], - // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] - - // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], - // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] - // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - - // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: - // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], - // skip body branch - // CHECK: br{{.+}} - // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - - // IV = IV + 1 and inner loop latch - // CHECK: [[OMP_PF_INNER_LOOP_INC]]: - // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], - // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 - // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] - - // check NextLB and NextUB - // CHECK: [[OMP_PF_INNER_LOOP_END]]: - // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - - // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: - // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - - // CHECK: [[OMP_PF_OUTER_LOOP_END]]: - // CHECK: ret - } - - return tmain(); -#endif -} - -// check code -// CHECK: define{{.+}} [[TMAIN]]() - -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( - -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_2:@.+]]( - -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_3:@.+]]( - -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_4:@.+]]( - -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_5:@.+]]( - -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_6:@.+]]( - -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_7:@.+]]( - -// CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( -// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca -// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca -// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca -// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca - -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - -// check EUB for distribute -// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, -// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] -// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] -// CHECK-DAG: [[EUB_TRUE]]: -// CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, -// CHECK: br label %[[EUB_END:.+]] -// CHECK-DAG: [[EUB_FALSE]]: -// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: br label %[[EUB_END]] -// CHECK-DAG: [[EUB_END]]: -// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] -// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - -// initialize omp.iv -// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], -// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[OMP_JUMP_BACK:.+]] - -// check exit condition -// CHECK: [[OMP_JUMP_BACK]]: -// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] -// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] - -// check that PrevLB and PrevUB are passed to the 'for' -// CHECK: [[DIST_BODY]]: -// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], -// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], -// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} -// check that distlb and distub are properly passed to fork_call -// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) -// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) -// CHECK: br label %[[DIST_INC:.+]] - -// increment by stride (distInc - 'parallel for' executes the whole chunk) and latch -// CHECK: [[DIST_INC]]: -// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], -// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], -// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] -// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[OMP_JUMP_BACK]] - -// CHECK-DAG: call void @__kmpc_for_static_fini( -// CHECK: ret - -// implementation of 'parallel for' -// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - -// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - -// initialize lb and ub to PrevLB and PrevUB -// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], -// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], -// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - -// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used -// In this case we use EUB -// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], -// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, -// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] -// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] -// CHECK: [[PF_EUB_TRUE]]: -// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, -// CHECK: br label %[[PF_EUB_END:.+]] -// CHECK-DAG: [[PF_EUB_FALSE]]: -// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], -// CHECK: br label %[[PF_EUB_END]] -// CHECK-DAG: [[PF_EUB_END]]: -// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] -// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - -// initialize omp.iv -// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], -// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] - -// check exit condition -// CHECK: [[OMP_PF_JUMP_BACK]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], -// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], -// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] -// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - -// check that PrevLB and PrevUB are passed to the 'for' -// CHECK: [[PF_BODY]]: -// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK: br label {{.+}} - -// check stride 1 for 'for' in 'distribute parallel for' -// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 -// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], -// CHECK: br label %[[OMP_PF_JUMP_BACK]] - -// CHECK-DAG: call void @__kmpc_for_static_fini( -// CHECK: ret - -// CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( -// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca -// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca -// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca -// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca - -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, - -// check EUB for distribute -// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, -// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] -// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] -// CHECK-DAG: [[EUB_TRUE]]: -// CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, -// CHECK: br label %[[EUB_END:.+]] -// CHECK-DAG: [[EUB_FALSE]]: -// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: br label %[[EUB_END]] -// CHECK-DAG: [[EUB_END]]: -// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] -// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - -// initialize omp.iv -// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], -// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[OMP_JUMP_BACK:.+]] - -// check exit condition -// CHECK: [[OMP_JUMP_BACK]]: -// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] -// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] - -// check that PrevLB and PrevUB are passed to the 'for' -// CHECK: [[DIST_BODY]]: -// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], -// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], -// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} -// check that distlb and distub are properly passed to fork_call -// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) -// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) -// CHECK: br label %[[DIST_INC:.+]] - -// increment by stride (distInc - 'parallel for' executes the whole chunk) and latch -// CHECK: [[DIST_INC]]: -// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], -// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], -// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] -// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[OMP_JUMP_BACK]] - -// CHECK-DAG: call void @__kmpc_for_static_fini( -// CHECK: ret - -// implementation of 'parallel for' -// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - -// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - -// initialize lb and ub to PrevLB and PrevUB -// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], -// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], -// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - -// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used -// In this case we use EUB -// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], -// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, -// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] -// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] -// CHECK: [[PF_EUB_TRUE]]: -// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, -// CHECK: br label %[[PF_EUB_END:.+]] -// CHECK-DAG: [[PF_EUB_FALSE]]: -// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], -// CHECK: br label %[[PF_EUB_END]] -// CHECK-DAG: [[PF_EUB_END]]: -// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] -// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - -// initialize omp.iv -// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], -// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] - -// check exit condition -// CHECK: [[OMP_PF_JUMP_BACK]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], -// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], -// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] -// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - -// check that PrevLB and PrevUB are passed to the 'for' -// CHECK: [[PF_BODY]]: -// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK: br label {{.+}} - -// check stride 1 for 'for' in 'distribute parallel for' -// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 -// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], -// CHECK: br label %[[OMP_PF_JUMP_BACK]] - -// CHECK-DAG: call void @__kmpc_for_static_fini( -// CHECK: ret - -// CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( -// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca -// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca -// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca -// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca - -// unlike the previous tests, in this one we have a outer and inner loop for 'distribute' -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - -// CHECK: [[DIST_OUTER_LOOP_HEADER]]: -// check EUB for distribute -// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, -// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] -// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] -// CHECK-DAG: [[EUB_TRUE]]: -// CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, -// CHECK: br label %[[EUB_END:.+]] -// CHECK-DAG: [[EUB_FALSE]]: -// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: br label %[[EUB_END]] -// CHECK-DAG: [[EUB_END]]: -// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] -// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], - -// initialize omp.iv -// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], -// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], - -// check exit condition -// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] -// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - -// CHECK: [[DIST_OUTER_LOOP_BODY]]: -// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - -// CHECK: [[DIST_INNER_LOOP_HEADER]]: -// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] -// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] - -// check that PrevLB and PrevUB are passed to the 'for' -// CHECK: [[DIST_INNER_LOOP_BODY]]: -// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], -// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], -// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} -// check that distlb and distub are properly passed to fork_call -// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) -// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) -// CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] - -// check DistInc -// CHECK: [[DIST_INNER_LOOP_INC]]: -// CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], -// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], -// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] -// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - -// CHECK: [[DIST_INNER_LOOP_END]]: -// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - -// CHECK: [[DIST_OUTER_LOOP_INC]]: -// check NextLB and NextUB -// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], -// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], -// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] -// CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], -// CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], -// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], -// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] -// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] - -// outer loop exit -// CHECK: [[DIST_OUTER_LOOP_END]]: -// CHECK-DAG: call void @__kmpc_for_static_fini( -// CHECK: ret - -// skip implementation of 'parallel for': using default scheduling and was tested above - -// CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( -// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca -// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca -// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca -// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca - -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, -// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, -// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case -// CHECK: ret - -// 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) -// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) - -// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, - -// initialize lb and ub to PrevLB and PrevUB -// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], -// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], -// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) - -// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used -// In this case we use EUB -// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], -// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, -// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] -// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] -// CHECK: [[PF_EUB_TRUE]]: -// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, -// CHECK: br label %[[PF_EUB_END:.+]] -// CHECK-DAG: [[PF_EUB_FALSE]]: -// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], -// CHECK: br label %[[PF_EUB_END]] -// CHECK-DAG: [[PF_EUB_END]]: -// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] -// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], - -// initialize omp.iv -// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], -// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] - -// check exit condition -// CHECK: [[OMP_PF_JUMP_BACK]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], -// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], -// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] -// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] - -// check that PrevLB and PrevUB are passed to the 'for' -// CHECK: [[PF_BODY]]: -// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK: br label {{.+}} - -// check stride 1 for 'for' in 'distribute parallel for' -// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 -// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], -// CHECK: br label %[[OMP_PF_JUMP_BACK]] - -// CHECK-DAG: call void @__kmpc_for_static_fini( -// CHECK: ret - -// CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, -// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, -// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case -// CHECK: ret - -// 'parallel for' implementation using outer and inner loops and PrevEUB -// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) -// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - -// initialize lb and ub to PrevLB and PrevUB -// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], -// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], -// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) -// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - -// check PrevEUB (using PrevUB instead of NumIt as upper bound) -// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: -// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], -// CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to -// CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], -// CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] -// CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] -// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] -// CHECK: [[PF_EUB_TRUE]]: -// CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], -// CHECK: br label %[[PF_EUB_END:.+]] -// CHECK-DAG: [[PF_EUB_FALSE]]: -// CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], -// CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to -// CHECK: br label %[[PF_EUB_END]] -// CHECK-DAG: [[PF_EUB_END]]: -// CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] -// CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] -// CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to -// CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], -// CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], - -// initialize omp.iv (IV = LB) -// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], - -// outer loop: while (IV < UB) { -// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], -// CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] -// CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - -// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: -// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] - -// CHECK: [[OMP_PF_INNER_FOR_HEADER]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], -// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] -// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - -// CHECK: [[OMP_PF_INNER_LOOP_BODY]]: -// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// skip body branch -// CHECK: br{{.+}} -// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - -// IV = IV + 1 and inner loop latch -// CHECK: [[OMP_PF_INNER_LOOP_INC]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], -// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 -// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] - -// check NextLB and NextUB -// CHECK: [[OMP_PF_INNER_LOOP_END]]: -// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - -// CHECK: [[OMP_PF_OUTER_LOOP_INC]]: -// CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], -// CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] -// CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], -// CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], -// CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], -// CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] -// CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], -// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - -// CHECK: [[OMP_PF_OUTER_LOOP_END]]: -// CHECK-DAG: call void @__kmpc_for_static_fini( -// CHECK: ret - -// CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, -// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, -// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case -// CHECK: ret - -// 'parallel for' implementation using outer and inner loops and PrevEUB -// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) -// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - -// initialize lb and ub to PrevLB and PrevUB -// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], -// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], -// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], -// CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], -// CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) -// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - -// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: -// CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) -// CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 -// CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - -// initialize omp.iv (IV = LB) -// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: -// CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], -// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] - -// CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], -// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] -// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - -// CHECK: [[OMP_PF_INNER_LOOP_BODY]]: -// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// skip body branch -// CHECK: br{{.+}} -// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - -// IV = IV + 1 and inner loop latch -// CHECK: [[OMP_PF_INNER_LOOP_INC]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], -// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 -// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] - -// check NextLB and NextUB -// CHECK: [[OMP_PF_INNER_LOOP_END]]: -// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - -// CHECK: [[OMP_PF_OUTER_LOOP_INC]]: -// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - -// CHECK: [[OMP_PF_OUTER_LOOP_END]]: -// CHECK: ret - -// CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, -// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, -// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case -// CHECK: ret - -// 'parallel for' implementation using outer and inner loops and PrevEUB -// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) -// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, -// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, - -// initialize lb and ub to PrevLB and PrevUB -// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], -// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], -// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} -// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], -// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], -// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], -// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], -// CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], -// CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) -// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] - -// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: -// CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) -// CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 -// CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] - -// initialize omp.iv (IV = LB) -// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: -// CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], -// CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], -// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] - -// CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], -// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] -// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] - -// CHECK: [[OMP_PF_INNER_LOOP_BODY]]: -// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], -// skip body branch -// CHECK: br{{.+}} -// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] - -// IV = IV + 1 and inner loop latch -// CHECK: [[OMP_PF_INNER_LOOP_INC]]: -// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], -// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 -// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] - -// check NextLB and NextUB -// CHECK: [[OMP_PF_INNER_LOOP_END]]: -// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] - -// CHECK: [[OMP_PF_OUTER_LOOP_INC]]: -// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] - -// CHECK: [[OMP_PF_OUTER_LOOP_END]]: -// CHECK: ret -#endif diff --git a/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp deleted file mode 100644 index d12c0aa11c..0000000000 --- a/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ /dev/null @@ -1,619 +0,0 @@ -// RxUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 -// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 - -// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -template -struct S { - T f; - S(T a) : f(a) {} - S() : f() {} - operator T() { return T(); } - ~S() {} -}; - -// CHECK: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } -template -T tmain() { - S test; - T t_var = T(); - T vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S &var = test; - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for firstprivate(t_var, vec, s_arr, s_arr, var, var) - for (int i = 0; i < 2; ++i) { - vec[i] = t_var; - s_arr[i] = var; - } - return T(); -} - -int main() { - static int svar; - volatile double g; - volatile double &g1 = g; - - #ifdef LAMBDA - // LAMBDA-LABEL: @main - // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( - [&]() { - static float sfvar; - // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( - - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for firstprivate(g, g1, svar, sfvar) - for (int i = 0; i < 2; ++i) { - // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) - // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) - - // addr alloca's - // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*, - // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[G1_REF:%.+]] = alloca double*, - // LAMBDA: [[TMP:%.+]] = alloca double*, - - // private alloca's - // LAMBDA: [[G_PRIV:%.+]] = alloca double, - // LAMBDA: [[G1_PRIV:%.+]] = alloca double, - // LAMBDA: [[TMP_PRIV:%.+]] = alloca double*, - // LAMBDA: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_PRIV:%.+]] = alloca float, - - // transfer input parameters into addr alloca's - // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]], - // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]], - // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], - // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]], - - // init private alloca's with addr alloca's - // g - // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to - // LAMBDA-32-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]] - // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_CONV]], - // LAMBDA-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]], - - // g1 - // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to - // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}** [[G1_REF]], - // LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]], - // LAMBDA-DAG: store {{.+}}* [[G1_REF_VAL]], {{.+}}** [[TMP]], - // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP]], - // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]], - // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]] - // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]], - - // svar - // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to - // LAMBDA-64-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]], - // LAMBDA-32-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]], - // LAMBDA-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_PRIV]], - - // sfvar - // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to - // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CONV]], - // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_PRIV]], - - // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( - // pass firstprivate parameters to parallel outlined function - // g - // LAMBDA-64-DAG: [[G_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_PRIV]], - // LAMBDA-64: [[G_CAST_CONV:%.+]] = bitcast {{.+}}* [[G_CAST:%.+]] to - // LAMBDA-64-DAG: store {{.+}} [[G_PRIV_VAL]], {{.+}}* [[G_CAST_CONV]], - // LAMBDA-64-DAG: [[G_PAR:%.+]] = load {{.+}}, {{.+}}* [[G_CAST]], - - // g1 - // LAMBDA-DAG: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV]], - // LAMBDA-DAG: [[G1_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV_VAL]], - // LAMBDA: [[G1_CAST_CONV:%.+]] = bitcast {{.+}}* [[G1_CAST:%.+]] to - // LAMBDA-DAG: store {{.+}} [[G1_PRIV_VAL]], {{.+}}* [[G1_CAST_CONV]], - // LAMBDA-DAG: [[G1_PAR:%.+]] = load {{.+}}, {{.+}}* [[G1_CAST]], - - // svar - // LAMBDA: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]], - // LAMBDA-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to - // LAMBDA-64-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST_CONV]], - // LAMBDA-32-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST:%.+]], - // LAMBDA-DAG: [[SVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]], - - // sfvar - // LAMBDA: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_PRIV]], - // LAMBDA-DAG: [[SFVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_CAST:%.+]] to - // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_CAST_CONV]], - // LAMBDA-DAG: [[SFVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CAST]], - - // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PAR]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]]) - // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIV]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]]) - // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( - // LAMBDA: ret void - - - // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) - // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) - // skip initial params - // LAMBDA: {{.+}} = alloca{{.+}}, - // LAMBDA: {{.+}} = alloca{{.+}}, - // LAMBDA: {{.+}} = alloca{{.+}}, - // LAMBDA: {{.+}} = alloca{{.+}}, - - // addr alloca's - // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*, - // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[G1_REF:%.+]] = alloca double*, - - // private alloca's (only for 32-bit) - // LAMBDA-32: [[G_PRIV:%.+]] = alloca double, - - // transfer input parameters into addr alloca's - // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]], - // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]], - // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], - // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]], - - // prepare parameters for lambda - // g - // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to - // LAMBDA-32-DAG: [[G_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]] - // LAMBDA-32-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_ADDR_REF]], - // LAMBDA-32-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]], - - // g1 - // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to - // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}* [[G1_REF]], - - // svar - // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to - - // sfvar - // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to - - // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( - g = 1; - g1 = 1; - svar = 3; - sfvar = 4.0; - // LAMBDA-64: store double 1.0{{.+}}, double* [[G_CONV]], - // LAMBDA-32: store double 1.0{{.+}}, double* [[G_PRIV]], - // LAMBDA: [[G1_REF_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]], - // LAMBDA: store {{.+}} 1.0{{.+}}, {{.+}}* [[G1_REF_REF]], - // LAMBDA-64: store {{.+}} 3, {{.+}}* [[SVAR_CONV]], - // LAMBDA-32: store {{.+}} 3, {{.+}}* [[SVAR_ADDR]], - // LAMBDA: store {{.+}} 4.0{{.+}}, {{.+}}* [[SFVAR_CONV]], - - // pass params to inner lambda - // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA-64: store double* [[G_CONV]], double** [[G_PRIVATE_ADDR_REF]], - // LAMBDA-32: store double* [[G_PRIV]], double** [[G_PRIVATE_ADDR_REF]], - // LAMBDA: [[G1_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // LAMBDA: [[G1_REF_REF:%.+]] = load double*, double** [[G1_REF]], - // LAMBDA: store double* [[G1_REF_REF]], double** [[G1_PRIVATE_ADDR_REF]], - // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // LAMBDA-64: store i{{[0-9]+}}* [[SVAR_CONV]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] - // LAMBDA-32: store i{{[0-9]+}}* [[SVAR_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] - // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // LAMBDA: store float* [[SFVAR_CONV]], float** [[SFVAR_PRIVATE_ADDR_REF]] - // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) - // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( - // LAMBDA: ret void - [&]() { - // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) - // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], - g = 2; - g1 = 2; - svar = 4; - sfvar = 8.0; - // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] - // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] - - // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], - // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] - // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] - // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] - // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] - }(); - } - }(); - return 0; - #else - S test; - int t_var = 0; - int vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S &var = test; - - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for firstprivate(t_var, vec, s_arr, s_arr, var, var, svar) - for (int i = 0; i < 2; ++i) { - vec[i] = t_var; - s_arr[i] = var; - } - return tmain(); - #endif -} - -// CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main() -// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOAD_FUN_0:@.+]]( -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) - -// CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[OMP_OUTLINED_0:@.+]] to void -// CHECK: ret - -// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]]) - -// addr alloca's -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*, - -// skip loop alloca's -// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}}, - -// private alloca's -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, - -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] - -// init addr alloca's with input values -// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]], -// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]], -// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]], -// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]], -// CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], - -// init private alloca's with addr alloca's -// t-var -// CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to -// CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]], -// CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]], -// CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]], - -// vec -// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], -// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to -// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], - -// s_arr -// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], -// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]], -// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to -// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]], -// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]] -// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]] -// CHECK-DAG: [[CPY_BODY]]: -// CHECK-DAG: call void @llvm.memcpy{{.+}}( -// CHECK-DAG: [[CPY_DONE]]: - -// var -// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]], -// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to -// CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]], -// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], - -// svar -// CHECK-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to -// CHECK-64-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]], -// CHECK-32-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]], -// CHECK-DAG: store {{.+}} [[SVAR_CONV_VAL]], {{.+}}* [[SVAR_PRIV]], - -// CHECK: call void @__kmpc_for_static_init_4( -// pass private alloca's to fork -// CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]], -// not dag to distinguish with S_VAR_CAST -// CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to -// CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]], -// CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]], -// CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]], -// CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], -// CHECK-DAG: [[SVAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]], -// CHECK-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to -// CHECK-64-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST_CONV]], -// CHECK-32-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST:%.+]], -// CHECK-DAG: [[SVAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]], -// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], [[S_FLOAT_TY]]* [[TMP_PRIV_VAL]], i{{[0-9]+}} [[SVAR_CAST_VAL]]) -// CHECK: call void @__kmpc_for_static_fini( - -// call destructors: var.. -// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) - -// ..and s_arr -// CHECK: {{.+}}: -// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]* -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) - -// CHECK: ret void - -// By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for. -// However, the support for 'firstprivate' of 'parallel' is only used when 'parallel' -// is found alone. Therefore we only have one 'firstprivate' support for 'parallel for' -// in combination -// CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]]) - -// addr alloca's -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, - -// skip loop alloca's -// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}}, - -// private alloca's -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, - -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] - -// init addr alloca's with input values -// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]], -// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]], -// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]], -// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]], -// CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], - -// init private alloca's with addr alloca's -// vec -// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], -// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to -// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], - -// s_arr -// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], -// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]], -// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to -// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]], -// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]] -// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]] -// CHECK-DAG: [[CPY_BODY]]: -// CHECK-DAG: call void @llvm.memcpy{{.+}}( -// CHECK-DAG: [[CPY_DONE]]: - -// var -// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]], -// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to -// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]], -// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], - -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call void @__kmpc_for_static_fini( - -// call destructors: var.. -// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) - -// ..and s_arr -// CHECK: {{.+}}: -// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]* -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) - -// CHECK: ret void - -// template tmain with S_INT_TY -// CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}() -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOAD_FUN_0:@.+]]( -// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]]) - -// CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void -// CHECK: ret - -// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) - -// addr alloca's -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, - -// skip loop alloca's -// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}}, - -// private alloca's -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, - -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] - -// init addr alloca's with input values -// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]], -// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]], -// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]], -// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]], - -// init private alloca's with addr alloca's -// t-var -// CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to -// CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]], -// CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]], -// CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]], - -// vec -// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], -// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to -// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], - -// s_arr -// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], -// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]], -// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to -// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]], -// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]] -// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]] -// CHECK-DAG: [[CPY_BODY]]: -// CHECK-DAG: call void @llvm.memcpy{{.+}}( -// CHECK-DAG: [[CPY_DONE]]: - -// var -// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]], -// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to -// CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]], -// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], - -// CHECK: call void @__kmpc_for_static_init_4( -// pass private alloca's to fork -// CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]], -// not dag to distinguish with S_VAR_CAST -// CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to -// CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]], -// CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]], -// CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]], -// CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], -// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], [[S_INT_TY]]* [[TMP_PRIV_VAL]]) -// CHECK: call void @__kmpc_for_static_fini( - -// call destructors: var.. -// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) - -// ..and s_arr -// CHECK: {{.+}}: -// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]* -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) - -// CHECK: ret void - -// By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for. -// However, the support for 'firstprivate' of 'parallel' is only used when 'parallel' -// is found alone. Therefore we only have one 'firstprivate' support for 'parallel for' -// in combination -// CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) - -// addr alloca's -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, - -// skip loop alloca's -// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}}, -// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}}, - -// private alloca's -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, - -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] - -// init addr alloca's with input values -// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]], -// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]], -// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]], -// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]], - -// init private alloca's with addr alloca's -// vec -// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], -// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to -// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], - -// s_arr -// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], -// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]], -// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to -// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]], -// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]] -// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]] -// CHECK-DAG: [[CPY_BODY]]: -// CHECK-DAG: call void @llvm.memcpy{{.+}}( -// CHECK-DAG: [[CPY_DONE]]: - -// var -// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]], -// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to -// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]], -// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], - -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call void @__kmpc_for_static_fini( - -// call destructors: var.. -// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) - -// ..and s_arr -// CHECK: {{.+}}: -// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]* -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) - -// CHECK: ret void - -#endif diff --git a/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/test/OpenMP/distribute_parallel_for_if_codegen.cpp deleted file mode 100644 index e6cd7215f5..0000000000 --- a/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ /dev/null @@ -1,192 +0,0 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK %s -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -void fn1(); -void fn2(); -void fn3(); -void fn4(); -void fn5(); -void fn6(); - -int Arg; - -// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test -void gtid_test() { -#pragma omp target -#pragma omp teams -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( -#pragma omp distribute parallel for - for(int i = 0 ; i < 100; i++) {} - // CHECK: define internal void [[OFFLOADING_FUN_0]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) - // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void - // CHECK: call void @__kmpc_for_static_fini( - - // CHECK: define{{.+}} void [[OMP_OUTLINED_0]]( - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void @__kmpc_for_static_fini( - // CHECK: ret -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for if (parallel: false) - for(int i = 0 ; i < 100; i++) { - // CHECK: define internal void [[OFFLOADING_FUN_1]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) - // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void @__kmpc_serialized_parallel( - // CHECK: call void [[OMP_OUTLINED_1:@.+]]( - // CHECK: call void @__kmpc_end_serialized_parallel( - // CHECK: call void @__kmpc_for_static_fini( - // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void @{{.+}}gtid_test - // CHECK: call void @__kmpc_for_static_fini( - // CHECK: ret - gtid_test(); - } -} - - -template -int tmain(T Arg) { -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for if (true) - for(int i = 0 ; i < 100; i++) { - fn1(); - } -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for if (false) - for(int i = 0 ; i < 100; i++) { - fn2(); - } -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for if (parallel: Arg) - for(int i = 0 ; i < 100; i++) { - fn3(); - } - return 0; -} - -// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() -int main() { -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_2:@.+]]( -// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for if (true) - for(int i = 0 ; i < 100; i++) { - // CHECK: define internal void [[OFFLOADING_FUN_0]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) - // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( - - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void - // CHECK: call void @__kmpc_for_static_fini( - // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call {{.*}}void @{{.+}}fn4 - // CHECK: call void @__kmpc_for_static_fini( - - fn4(); - } - -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for if (false) - for(int i = 0 ; i < 100; i++) { - // CHECK: define internal void [[OFFLOADING_FUN_1]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) - // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( - - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void @__kmpc_serialized_parallel( - // CHECK: call void [[OMP_OUTLINED_3:@.+]]( - // CHECK: call void @__kmpc_end_serialized_parallel( - // CHECK: call void @__kmpc_for_static_fini( - - // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call {{.*}}void @{{.+}}fn5 - // CHECK: call void @__kmpc_for_static_fini( - fn5(); - } - -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for if (Arg) - for(int i = 0 ; i < 100; i++) { - // CHECK: define internal void [[OFFLOADING_FUN_2]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) - // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]]( - - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void - // CHECK: call void @__kmpc_serialized_parallel( - // CHECK: call void [[OMP_OUTLINED_4:@.+]]( - // CHECK: call void @__kmpc_end_serialized_parallel( - // CHECK: call void @__kmpc_for_static_fini( - - // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( - // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call {{.*}}void @{{.+}}fn6 - // CHECK: call void @__kmpc_for_static_fini( - fn6(); - } - - return tmain(Arg); -} - -// CHECK-LABEL: define {{.+}} @{{.+}}tmain - -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void -// CHECK: call void @__kmpc_for_static_fini( - -// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]] -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call {{.*}}void @{{.+}}fn1 -// CHECK: call void @__kmpc_for_static_fini( -// CHECK: ret void - -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call {{.*}}void @__kmpc_serialized_parallel( -// CHECK: call void [[T_OUTLINE_FUN_2:@.+]]( -// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel( -// CHECK: call void @__kmpc_for_static_fini( - -// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]] -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call {{.*}}void @{{.+}}fn2 -// CHECK: call void @__kmpc_for_static_fini( -// CHECK: ret void - -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void -// CHECK: call {{.*}}void @__kmpc_serialized_parallel( -// call void [[T_OUTLINE_FUN_3:@.+]]( -// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel( - -// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]] -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call {{.*}}void @{{.+}}fn3 -// CHECK: call void @__kmpc_for_static_fini( -// CHECK: ret void -#endif diff --git a/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp deleted file mode 100644 index 2e8da79c03..0000000000 --- a/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp +++ /dev/null @@ -1,653 +0,0 @@ -// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 -// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 - -// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -template -struct S { - T f; - S(T a) : f(a) {} - S() : f() {} - operator T() { return T(); } - ~S() {} -}; - -// CHECK: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } -template -T tmain() { - S test; - T t_var = T(); - T vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S &var = test; - #pragma omp target - #pragma omp teams -#pragma omp distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var) - for (int i = 0; i < 2; ++i) { - vec[i] = t_var; - s_arr[i] = var; - } - return T(); -} - -int main() { - static int svar; - volatile double g; - volatile double &g1 = g; - - #ifdef LAMBDA - // LAMBDA-LABEL: @main - // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( - [&]() { - static float sfvar; - // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( - - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) - #pragma omp target - #pragma omp teams -#pragma omp distribute parallel for lastprivate(g, g1, svar, sfvar) - for (int i = 0; i < 2; ++i) { - // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) - // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, - // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, - // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, - // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, - // LAMBDA: [[TMP_G1:%.+]] = alloca double*, - // loop variables - // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, - // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, - // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, - // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, - // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, - // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, - // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, - // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, - - // init addr alloca's - // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], - // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], - // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], - // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], - - // init private variables - // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], - // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], - // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], - // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], - // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]], - // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]], - // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], - - // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( - // LAMBDA: [[G1_PAR:%.+]] = load{{.+}}, {{.+}} [[TMP_G1_PRIVATE]], - // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIVATE]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PRIVATE]], {{.+}} [[SFVAR_PRIVATE]]) - // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIVATE]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PRIVATE]], {{.+}} [[SFVAR_PRIVATE]]) - // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( - - // lastprivate - // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], - // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 - // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] - - // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: - // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], - // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], - // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], - // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], - // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]], - - // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], - // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], - // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], - // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], - // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] - // LAMBDA: [[OMP_LASTPRIV_DONE]]: - // LAMBDA: ret - - g = 1; - g1 = 1; - svar = 3; - sfvar = 4.0; - // outlined function for 'parallel for' - // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G_IN:%.+]], {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]]) - // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G_IN:%.+]], {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]]) - - // addr alloca's - // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, - // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, - // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, - // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, - - // loop variables - // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, - // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, - // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, - // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, - - // private alloca's - // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, - // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, - // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, - // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, - - // init addr alloca's - // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], - // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], - // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], - // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], - - // init private variables - // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], - // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], - // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], - - // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], - // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1]], - - // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( - - // loop body - // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], - // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], - // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], - // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], - // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], - // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], - // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], - // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], - // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] - // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] - // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) - - // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( - - // lastprivate - // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], - // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 - // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] - // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: - // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], - // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], - // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], - // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], - // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], - // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], - // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], - // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], - // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], - // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] - // LAMBDA: [[OMP_LASTPRIV_DONE]]: - // LAMBDA: ret - - [&]() { - // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) - // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], - g = 2; - g1 = 2; - svar = 4; - sfvar = 8.0; - // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] - // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] - - // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], - // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] - // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] - // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] - // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] - }(); - } - }(); - return 0; - #else - S test; - int t_var = 0; - int vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S &var = test; - - #pragma omp target - #pragma omp teams -#pragma omp distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) - for (int i = 0; i < 2; ++i) { - vec[i] = t_var; - s_arr[i] = var; - } - int i; - - return tmain(); - #endif -} - -// CHECK: define{{.*}} i{{[0-9]+}} @main() -// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) -// CHECK: ret - -// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( -// CHECK: ret -// -// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) -// CHECK: {{.+}} = alloca i{{[0-9]+}}*, -// CHECK: {{.+}} = alloca i{{[0-9]+}}*, -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, -// CHECK: [[TMP:%.*]] = alloca [[S_FLOAT_TY]]*, -// skip loop variables -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, - -// copy from parameters to local address variables -// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], -// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], -// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], -// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], - -// load content of local address variables -// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], -// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], -// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], -// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], -// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]], -// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]], -// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], - -// call constructor for s_arr -// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], -// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BGN]], -// CHECK: br label %[[S_ARR_CST_LOOP:.+]] -// CHECK: [[S_ARR_CST_LOOP]]: -// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}} -// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_CTOR]]) -// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]], -// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]] -// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]] -// CHECK: [[S_ARR_CST_END]]: -// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]], -// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) -// CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]], - -// the distribute loop -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}} [[TMP_PRIV]], -// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]], {{.+}} [[S_VAR_PRIV]]) -// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]], {{.+}} [[S_VAR_PRIV]]) - -// CHECK: call void @__kmpc_for_static_fini( - -// lastprivates -// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], -// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 -// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] - -// CHECK: [[OMP_LASTPRIV_BLOCK]]: -// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], -// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], -// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* -// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], -// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* -// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] -// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] -// CHECK: [[S_ARR_COPY_BLOCK]]: -// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} -// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} -// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* -// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) -// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 -// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} -// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] -// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] -// CHECK: [[S_ARR_COPY_DONE]]: -// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], -// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8* -// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) -// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], -// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], -// CHECK: ret void - -// outlined function for 'parallel for' -// CHECK-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]], {{.+}} [[SVAR_IN:%.+]]) -// CHECK-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]], {{.+}} [[SVAR_IN:%.+]]) - -// CHECK: {{.+}} = alloca i{{[0-9]+}}*, -// CHECK: {{.+}} = alloca i{{[0-9]+}}*, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, -// skip loop variables -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, - -// copy from parameters to local address variables -// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], -// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], -// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], -// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], - -// load content of local address variables -// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], -// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], -// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], -// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], -// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], - -// call constructor for s_arr -// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], -// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BGN]], -// CHECK: br label %[[S_ARR_CST_LOOP:.+]] -// CHECK: [[S_ARR_CST_LOOP]]: -// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}} -// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_CTOR]]) -// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]], -// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]] -// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]] -// CHECK: [[S_ARR_CST_END]]: -// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]], -// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) -// CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]], - -// CHECK: call void @__kmpc_for_static_init_4( - -// loop body -// assignment: vec[i] = t_var; -// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], -// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} -// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], - -// assignment: s_arr[i] = var; -// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], -// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], -// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* -// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], - -// CHECK: call void @__kmpc_for_static_fini( - -// lastprivates -// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], -// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 -// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] - -// CHECK: [[OMP_LASTPRIV_BLOCK]]: -// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], -// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], -// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* -// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], -// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* -// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] -// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] -// CHECK: [[S_ARR_COPY_BLOCK]]: -// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} -// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} -// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* -// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) -// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 -// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} -// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] -// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] -// CHECK: [[S_ARR_COPY_DONE]]: -// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], -// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* -// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) -// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], -// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], -// CHECK: ret void - -// template tmain -// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) -// CHECK: ret - -// CHECK: define internal void [[OFFLOAD_FUN_1]]( -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, -// CHECK: ret - -// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN:%.+]]) -// skip alloca of global_tid and bound_tid -// CHECK: {{.+}} = alloca i{{[0-9]+}}*, -// CHECK: {{.+}} = alloca i{{[0-9]+}}*, -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, -// skip loop variables -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, - -// skip init of bound and global tid -// CHECK: store i{{[0-9]+}}* {{.*}}, -// CHECK: store i{{[0-9]+}}* {{.*}}, -// copy from parameters to local address variables -// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], -// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], -// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], -// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]], - -// load content of local address variables -// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], -// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], -// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], -// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]], -// CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR_REF]], [[S_INT_TY]]** [[TMP]], -// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], -// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]], - -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}} [[TMP_PRIV]], -// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]]) -// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]]) - -// CHECK: call void @__kmpc_for_static_fini( - -// lastprivates -// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], -// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 -// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] - -// CHECK: [[OMP_LASTPRIV_BLOCK]]: -// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], -// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], -// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* -// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], -// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]* -// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] -// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] -// CHECK: [[S_ARR_COPY_BLOCK]]: -// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} -// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} -// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* -// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) -// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 -// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} -// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] -// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] -// CHECK: [[S_ARR_COPY_DONE]]: -// CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], -// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8* -// CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL_BCAST]],{{.+}}) -// CHECK: ret void - -// outlined function for 'parallel for' -// CHECK-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]]) -// CHECK-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]]) - -// CHECK: {{.+}} = alloca i{{[0-9]+}}*, -// CHECK: {{.+}} = alloca i{{[0-9]+}}*, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// skip loop variables -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: {{.+}} = alloca i{{[0-9]+}}, -// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, - -// copy from parameters to local address variables -// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], -// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], -// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], -// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]], - -// load content of local address variables -// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], -// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], -// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], -// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], - -// call constructor for s_arr -// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], -// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BGN]], -// CHECK: br label %[[S_ARR_CST_LOOP:.+]] -// CHECK: [[S_ARR_CST_LOOP]]: -// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}} -// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_CTOR]]) -// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]], -// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]] -// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]] -// CHECK: [[S_ARR_CST_END]]: -// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]], -// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]]) -// CHECK: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]], - -// CHECK: call void @__kmpc_for_static_init_4( - -// assignment: vec[i] = t_var; -// CHECK: [[IV_VAL:%.+]] = -// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], -// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} -// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], - -// assignment: s_arr[i] = var; -// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], -// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], -// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR]] to i8* -// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], - -// CHECK: call void @__kmpc_for_static_fini( - -// lastprivates -// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], -// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 -// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] - -// CHECK: [[OMP_LASTPRIV_BLOCK]]: -// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], -// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], -// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* -// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], -// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]* -// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] -// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] -// CHECK: [[S_ARR_COPY_BLOCK]]: -// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} -// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} -// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* -// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) -// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 -// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} -// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] -// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] -// CHECK: [[S_ARR_COPY_DONE]]: -// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], -// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR_REF]] to i8* -// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) -// CHECK: ret void - -#endif diff --git a/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp deleted file mode 100644 index 20698ce346..0000000000 --- a/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp +++ /dev/null @@ -1,121 +0,0 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -typedef __INTPTR_TYPE__ intptr_t; - -// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } -// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } - -void foo(); - -struct S { - intptr_t a, b, c; - S(intptr_t a) : a(a) {} - operator char() { return a; } - ~S() {} -}; - -template -int tmain() { -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for num_threads(C) - for (int i = 0; i < 100; i++) - foo(); -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for num_threads(T(23)) - for (int i = 0; i < 100; i++) - foo(); - return 0; -} - -int main() { - S s(0); - char a = s; -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( -// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]() -// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]() -#pragma omp target -#pragma omp teams - // CHECK: define internal void [[OFFLOADING_FUN_0]]( - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) -#pragma omp distribute parallel for num_threads(2) - for (int i = 0; i < 100; i++) { - // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( - // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2) - // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( - foo(); - } -#pragma omp target -#pragma omp teams - // CHECK: define internal void [[OFFLOADING_FUN_1]]( - - // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) -#pragma omp distribute parallel for num_threads(a) - for (int i = 0; i < 100; i++) { - // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( - // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*, - // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]], - // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]], - // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}} - // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]]) - // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( - foo(); - } - return a + tmain() + tmain(); -} - -// tmain 5 -// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]() -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]]( -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]]( - -// tmain 1 -// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]() -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]]( -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]]( - -// CHECK: define internal void [[T_OFFLOADING_FUN_0]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]]( -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( - -// CHECK: define internal void [[T_OFFLOADING_FUN_1]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]]( -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( - -// CHECK: define internal void [[T_OFFLOADING_FUN_2]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( - -// CHECK: define internal void [[T_OFFLOADING_FUN_3]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) - -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]( -// CHECK-DAG: [[CALL_RES:%.+]] = invoke{{.+}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}}) -// CHECK-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}} -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]]) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( -#endif diff --git a/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/test/OpenMP/distribute_parallel_for_private_codegen.cpp deleted file mode 100644 index d8d8a9a1ed..0000000000 --- a/test/OpenMP/distribute_parallel_for_private_codegen.cpp +++ /dev/null @@ -1,297 +0,0 @@ -// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 -// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 - -// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -template -struct S { - T f; - S(T a) : f(a) {} - S() : f() {} - operator T() { return T(); } - ~S() {} -}; - -// CHECK: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } -template -T tmain() { - S test; - T t_var = T(); - T vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S &var = test; - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for private(t_var, vec, s_arr, s_arr, var, var) - for (int i = 0; i < 2; ++i) { - vec[i] = t_var; - s_arr[i] = var; - } - return T(); -} - -int main() { - static int svar; - volatile double g; - volatile double &g1 = g; - - #ifdef LAMBDA - // LAMBDA-LABEL: @main - // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( - [&]() { - static float sfvar; - // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( - // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( - - // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]() - // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for private(g, g1, svar, sfvar) - for (int i = 0; i < 2; ++i) { - // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) - // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double, - // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double, - // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*, - // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float, - // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]], - // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( - // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to {{.+}}, - // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( - // LAMBDA: ret void - - // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED]]( - // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double, - // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double, - // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*, - // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float, - - g = 1; - g1 = 1; - svar = 3; - sfvar = 4.0; - // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]], - // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]], - // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], - // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]], - // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]], - // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_PRIVATE_ADDR]], - // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], - // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] - // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]] - // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) - // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( - // LAMBDA: ret void - [&]() { - // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) - // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], - g = 2; - g1 = 2; - svar = 4; - sfvar = 8.0; - // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] - // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] - - // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], - // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] - // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] - // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] - // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] - }(); - } - }(); - return 0; - #else - S test; - int t_var = 0; - int vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S &var = test; - - #pragma omp target - #pragma omp teams - #pragma omp distribute parallel for private(t_var, vec, s_arr, s_arr, var, var, svar) - for (int i = 0; i < 2; ++i) { - vec[i] = t_var; - s_arr[i] = var; - } - return tmain(); - #endif -} - -// CHECK: define{{.*}} i{{[0-9]+}} @main() -// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOAD_FUN_0:@.+]]( - -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: ret - -// CHECK: define{{.+}} [[OFFLOAD_FUN_0]]() -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_0:@.+]] to void -// CHECK: ret -// -// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK-NOT: alloca [[S_FLOAT_TY]], -// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// this is the ctor loop -// CHECK: {{.+}}: -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]* -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to {{.+}}, -// CHECK: call void @__kmpc_for_static_fini( - -// call destructors: var.. -// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) - -// ..and s_arr -// CHECK: {{.+}}: -// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]* -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) - -// CHECK: ret void - -// By OpenMP specifications, private applies to both distribute and parallel for. -// However, the support for 'private' of 'parallel' is only used when 'parallel' -// is found alone. Therefore we only have one 'private' support for 'parallel for' -// in combination -// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_0]]( -// CHECK: [[T_VAR_PRIV:%t_var+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%vec+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%s_arr+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%var+]] = alloca [[S_FLOAT_TY]], -// CHECK-NOT: alloca [[S_FLOAT_TY]], -// CHECK: [[S_VAR_PRIV:%svar+]] = alloca i{{[0-9]+}}, -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// this is the ctor loop -// CHECK: {{.+}}: -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]* -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call void @__kmpc_for_static_fini( - -// call destructors: var.. -// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) - -// ..and s_arr -// CHECK: {{.+}}: -// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]* -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) - -// CHECK: ret void - -// template tmain with S_INT_TY -// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: call i{{[0-9]+}} @__tgt_target_teams( -// CHECK: call void [[OFFLOAD_FUN_1:@.+]]( -// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: ret - -// CHECK: ret - -// CHECK: define internal void [[OFFLOAD_FUN_1]]() -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void -// CHECK: ret -// -// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK-NOT: alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], -// CHECK-NOT: alloca [[S_INT_TY]], -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// CHECK: {{.+}}: -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]* -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]]) -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, -// CHECK: call void @__kmpc_for_static_fini( -// CHECK: ret void - -// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]( -// CHECK: [[T_VAR_PRIV:%t_var+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%vec+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%s_arr+]] = alloca [2 x [[S_INT_TY]]], -// CHECK-NOT: alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%var+]] = alloca [[S_INT_TY]], -// CHECK-NOT: alloca [[S_INT_TY]], -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// this is the ctor loop -// CHECK: {{.+}}: -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]* -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]]) -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call void @__kmpc_for_static_fini( - -// call destructors: var.. -// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) - -// ..and s_arr -// CHECK: {{.+}}: -// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]* -// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) - -// CHECK: ret void - -#endif diff --git a/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp deleted file mode 100644 index 958b4e782f..0000000000 --- a/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// add -fopenmp-targets - -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -typedef __INTPTR_TYPE__ intptr_t; - -// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } - -void foo(); - -struct S { - intptr_t a, b, c; - S(intptr_t a) : a(a) {} - operator char() { return a; } - ~S() {} -}; - -template -T tmain() { -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for proc_bind(master) - for(int i = 0; i < 1000; i++) {} - return T(); -} - -int main() { - // CHECK-LABEL: @main -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for proc_bind(spread) - for(int i = 0; i < 1000; i++) {} -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for proc_bind(close) - for(int i = 0; i < 1000; i++) {} - return tmain(); -} - -// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) -// CHECK: call void [[OFFL1:@.+]]() -// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) -// CHECK: call void [[OFFL2:@.+]]() -// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]() -// CHECK: ret i32 [[CALL_RET]] - -// CHECK: define{{.+}} void [[OFFL1]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) - -// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]], -// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, -// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], -// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4) -// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( -// CHECK: ret void - -// CHECK: define{{.+}} [[OFFL2]]() -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) - -// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]], -// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, -// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], -// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3) -// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( -// CHECK: ret void - -// CHECK: define{{.+}} [[TMAIN]]() -// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) -// CHECK: call void [[OFFL3:@.+]]() - -// CHECK: define{{.+}} [[OFFL3]]() -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) - -// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]], -// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, -// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], -// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2) -// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( -// CHECK: ret void -#endif -- 2.40.0