From f860d72ec36b6b7b36e6128297eb143090ca46a3 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 20 May 2015 13:12:48 +0000 Subject: [PATCH] [OPENMP] Fix codegen for ordered loop directives. loops with ordered clause must be generated the same way as dynamic loops, but with static scheduleing. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@237788 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 53 +++++++++++++++++++------------- lib/CodeGen/CGOpenMPRuntime.h | 12 ++++---- lib/CodeGen/CGStmtOpenMP.cpp | 54 +++++++++++++++++---------------- lib/CodeGen/CodeGenFunction.h | 6 ++-- test/OpenMP/ordered_codegen.cpp | 28 +++++++++-------- 5 files changed, 84 insertions(+), 69 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index e96200a601..a866525454 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1449,51 +1449,61 @@ enum OpenMPSchedType { OMP_sch_auto = 38, /// \brief Lower bound for 'ordered' versions. OMP_ord_lower = 64, - /// \brief Lower bound for 'nomerge' versions. - OMP_nm_lower = 160, + OMP_ord_static_chunked = 65, + OMP_ord_static = 66, + OMP_ord_dynamic_chunked = 67, + OMP_ord_guided_chunked = 68, + OMP_ord_runtime = 69, + OMP_ord_auto = 70, + OMP_sch_default = OMP_sch_static, }; /// \brief Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, - bool Chunked) { + bool Chunked, bool Ordered) { switch (ScheduleKind) { case OMPC_SCHEDULE_static: - return Chunked ? OMP_sch_static_chunked : OMP_sch_static; + return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) + : (Ordered ? OMP_ord_static : OMP_sch_static); case OMPC_SCHEDULE_dynamic: - return OMP_sch_dynamic_chunked; + return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; case OMPC_SCHEDULE_guided: - return OMP_sch_guided_chunked; - case OMPC_SCHEDULE_auto: - return OMP_sch_auto; + return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; case OMPC_SCHEDULE_runtime: - return OMP_sch_runtime; + return Ordered ? OMP_ord_runtime : OMP_sch_runtime; + case OMPC_SCHEDULE_auto: + return Ordered ? OMP_ord_auto : OMP_sch_auto; case OMPC_SCHEDULE_unknown: assert(!Chunked && "chunk was specified but schedule kind not known"); - return OMP_sch_static; + return Ordered ? OMP_ord_static : OMP_sch_static; } llvm_unreachable("Unexpected runtime schedule"); } bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const { - auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static; } bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { - auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false); + auto Schedule = + getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); return Schedule != OMP_sch_static; } void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPScheduleClauseKind ScheduleKind, - unsigned IVSize, bool IVSigned, + unsigned IVSize, bool IVSigned, bool Ordered, llvm::Value *IL, llvm::Value *LB, llvm::Value *UB, llvm::Value *ST, llvm::Value *Chunk) { - OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); - if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) { + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); + if (Ordered || + (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && + Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) { // Call __kmpc_dispatch_init( // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, // kmp_int[32|64] lower, kmp_int[32|64] upper, @@ -1518,12 +1528,13 @@ void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, // kmp_int[32|64] incr, kmp_int[32|64] chunk); if (Chunk == nullptr) { - assert(Schedule == OMP_sch_static && + assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && "expected static non-chunked schedule"); // If the Chunk was not specified in the clause - use default value 1. Chunk = CGF.Builder.getIntN(IVSize, 1); } else - assert(Schedule == OMP_sch_static_chunked && + assert((Schedule == OMP_sch_static_chunked || + Schedule == OMP_ord_static_chunked) && "expected static chunked schedule"); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), @@ -1548,10 +1559,10 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, Args); } -void CGOpenMPRuntime::emitForOrderedDynamicIterationEnd(CodeGenFunction &CGF, - SourceLocation Loc, - unsigned IVSize, - bool IVSigned) { +void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, + SourceLocation Loc, + unsigned IVSize, + bool IVSigned) { // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc)}; diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index bcfe6103bc..f5aa4a51df 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -439,6 +439,7 @@ public: /// \param SchedKind Schedule kind, specified by the 'schedule' clause. /// \param IVSize Size of the iteration variable in bits. /// \param IVSigned Sign of the interation variable. + /// \param Ordered true if loop is ordered, false otherwise. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. /// \param LB Address of the output variable in which the lower iteration @@ -452,8 +453,8 @@ public: /// virtual void emitForInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPScheduleClauseKind SchedKind, unsigned IVSize, - bool IVSigned, llvm::Value *IL, llvm::Value *LB, - llvm::Value *UB, llvm::Value *ST, + bool IVSigned, bool Ordered, llvm::Value *IL, + llvm::Value *LB, llvm::Value *UB, llvm::Value *ST, llvm::Value *Chunk = nullptr); /// \brief Call the appropriate runtime routine to notify that we finished @@ -464,10 +465,9 @@ public: /// \param IVSize Size of the iteration variable in bits. /// \param IVSigned Sign of the interation variable. /// - virtual void emitForOrderedDynamicIterationEnd(CodeGenFunction &CGF, - SourceLocation Loc, - unsigned IVSize, - bool IVSigned); + virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, + SourceLocation Loc, unsigned IVSize, + bool IVSigned); /// \brief Call the appropriate runtime routine to notify that we finished /// all the work with current loop. diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 61945c63ab..f04a29392c 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -803,15 +803,16 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, - llvm::Value *LB, llvm::Value *UB, - llvm::Value *ST, llvm::Value *IL, - llvm::Value *Chunk) { + bool Ordered, llvm::Value *LB, + llvm::Value *UB, llvm::Value *ST, + llvm::Value *IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). - const bool Dynamic = RT.isDynamic(ScheduleKind); + const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); - assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && + assert((Ordered || + !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -869,9 +870,10 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); RT.emitForInit( - *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, - (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST, - Chunk); + *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB, + (DynamicOrOrdered ? EmitAnyExpr(S.getLastIteration()).getScalarVal() + : UB), + ST, Chunk); auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); @@ -881,7 +883,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, LoopStack.push(CondBlock); llvm::Value *BoolCondVal = nullptr; - if (!Dynamic) { + if (!DynamicOrOrdered) { // UB = min(UB, GlobalUB) EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB @@ -909,21 +911,19 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, // Emit "IV = LB" (in case of static schedule, we have already calculated new // LB for loop condition and emitted it above). - if (Dynamic) + if (DynamicOrOrdered) EmitIgnoredExpr(S.getInit()); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - bool DynamicWithOrderedClause = - Dynamic && S.getSingleClause(OMPC_ordered) != nullptr; SourceLocation Loc = S.getLocStart(); // Generate !llvm.loop.parallel metadata for loads and stores for loops with // dynamic/guided scheduling and without ordered clause. LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || ScheduleKind == OMPC_SCHEDULE_guided) && - !DynamicWithOrderedClause); + !Ordered); EmitOMPInnerLoop( S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false), S.getInc(), @@ -931,16 +931,16 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, CGF.EmitOMPLoopBody(S); CGF.EmitStopPoint(&S); }, - [DynamicWithOrderedClause, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { - if (DynamicWithOrderedClause) { - CGF.CGM.getOpenMPRuntime().emitForOrderedDynamicIterationEnd( + [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( CGF, Loc, IVSize, IVSigned); } }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); - if (!Dynamic) { + if (!DynamicOrOrdered) { // Emit "LB = LB + Stride", "UB = UB + Stride". EmitIgnoredExpr(S.getNextLowerBound()); EmitIgnoredExpr(S.getNextUpperBound()); @@ -952,7 +952,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - if (!Dynamic) + if (!DynamicOrOrdered) RT.emitForStaticFinish(*this, S.getLocEnd()); } @@ -1066,16 +1066,18 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { ScheduleKind = ScheduleInfo.second; const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + const bool Ordered = S.getSingleClause(OMPC_ordered) != nullptr; if (RT.isStaticNonchunked(ScheduleKind, - /* Chunked */ Chunk != nullptr)) { + /* Chunked */ Chunk != nullptr) && + !Ordered) { // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When no chunk_size is specified, the iteration space is divided into // chunks that are approximately equal in size, and at most one chunk is // distributed to each thread. Note that the size of the chunks is // unspecified in this case. RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - IL.getAddress(), LB.getAddress(), UB.getAddress(), - ST.getAddress()); + Ordered, IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress()); // UB = min(UB, GlobalUB); EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; @@ -1093,9 +1095,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), - UB.getAddress(), ST.getAddress(), IL.getAddress(), - Chunk); + EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered, + LB.getAddress(), UB.getAddress(), ST.getAddress(), + IL.getAddress(), Chunk); } EmitOMPReductionClauseFinal(S); // Emit final copy of the lastprivate variables if IsLastIter != 0. @@ -1213,8 +1215,8 @@ static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF, // Emit static non-chunked loop. CGF.CGM.getOpenMPRuntime().emitForInit( CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, - /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), - ST.getAddress()); + /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), + LB.getAddress(), UB.getAddress(), ST.getAddress()); // UB = min(UB, GlobalUB); auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); auto *MinUBGlobalUB = CGF.Builder.CreateSelect( diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index be134bc5d8..650ad7b040 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2198,9 +2198,9 @@ private: bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, llvm::Value *LB, - llvm::Value *UB, llvm::Value *ST, llvm::Value *IL, - llvm::Value *Chunk); + OMPPrivateScope &LoopScope, bool Ordered, + llvm::Value *LB, llvm::Value *UB, llvm::Value *ST, + llvm::Value *IL, llvm::Value *Chunk); public: diff --git a/test/OpenMP/ordered_codegen.cpp b/test/OpenMP/ordered_codegen.cpp index c8751faf21..adc6ff6f86 100644 --- a/test/OpenMP/ordered_codegen.cpp +++ b/test/OpenMP/ordered_codegen.cpp @@ -12,17 +12,18 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) #pragma omp for schedule(static) ordered -// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) -// UB = min(UB, GlobalUB) -// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] -// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423 -// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]] -// CHECK: [[UBRESULT:%.+]] = phi i32 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ] -// CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]] -// CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]] -// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]] +// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// +// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]]) +// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 +// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] + // Loop header +// CHECK: [[O_LOOP1_BODY]] +// CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]] +// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]] // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]] + // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]] // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] @@ -47,10 +48,11 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}} // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]] +// CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: [[O_LOOP1_END]] // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -59,7 +61,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { void dynamic1(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) #pragma omp for schedule(dynamic) ordered -// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1) +// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 67, i64 0, i64 16908287, i64 1, i64 1) // // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 @@ -112,7 +114,7 @@ void test_auto(float *a, float *b, float *c, float *d) { unsigned int y = 0; // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) #pragma omp for schedule(auto) collapse(2) ordered -// CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 38, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1) +// CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 70, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1) // // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 @@ -165,7 +167,7 @@ void runtime(float *a, float *b, float *c, float *d) { int x = 0; // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) #pragma omp for collapse(2) schedule(runtime) ordered -// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1) +// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 69, i32 0, i32 199, i32 1, i32 1) // // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 -- 2.40.0