From: Alexey Bataev Date: Thu, 31 Dec 2015 06:52:34 +0000 (+0000) Subject: [OPENMP 4.5] Codegen for 'schedule' clause with monotonic/nonmonotonic modifiers. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1cc4fd2a8de554caa484978cb31569f6804e9065;p=clang [OPENMP 4.5] Codegen for 'schedule' clause with monotonic/nonmonotonic modifiers. OpenMP 4.5 adds support for monotonic/nonmonotonic modifiers in 'schedule' clause. Add codegen for these modifiers. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@256666 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index bcd2ac5192..82011984f8 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1125,7 +1125,8 @@ emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, } static void emitSimdlenSafelenClause(CodeGenFunction &CGF, - const OMPExecutableDirective &D) { + const OMPExecutableDirective &D, + bool IsMonotonic) { if (!CGF.HaveInsertPoint()) return; if (const auto *C = D.getSingleClause()) { @@ -1136,7 +1137,8 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF, // In presence of finite 'safelen', it may be unsafe to mark all // the memory instructions parallel, because loop-carried // dependences of 'safelen' iterations are possible. - CGF.LoopStack.setParallel(!D.getSingleClause()); + if (!IsMonotonic) + CGF.LoopStack.setParallel(!D.getSingleClause()); } else if (const auto *C = D.getSingleClause()) { RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), /*ignoreResult=*/true); @@ -1149,11 +1151,12 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF, } } -void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { +void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, + bool IsMonotonic) { // Walk clauses and process safelen/lastprivate. - LoopStack.setParallel(); + LoopStack.setParallel(!IsMonotonic); LoopStack.setVectorizeEnable(true); - emitSimdlenSafelenClause(*this, D); + emitSimdlenSafelenClause(*this, D, IsMonotonic); } void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { @@ -1255,12 +1258,10 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, - const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, - bool Ordered, Address LB, - Address UB, Address ST, - Address IL, llvm::Value *Chunk) { +void CodeGenFunction::EmitOMPForOuterLoop( + OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -1378,13 +1379,10 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, // Generate !llvm.loop.parallel metadata for loads and stores for loops // with dynamic/guided scheduling and without ordered clause. - if (!isOpenMPSimdDirective(S.getDirectiveKind())) { - LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || - ScheduleKind == OMPC_SCHEDULE_guided) && - !Ordered); - } else { - EmitOMPSimdInit(S); - } + if (!isOpenMPSimdDirective(S.getDirectiveKind())) + LoopStack.setParallel(!IsMonotonic); + else + EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getLocStart(); EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), @@ -1425,14 +1423,30 @@ static LValue EmitOMPHelperVar(CodeGenFunction &CGF, return CGF.EmitLValue(Helper); } -static std::pair +namespace { + struct ScheduleKindModifiersTy { + OpenMPScheduleClauseKind Kind; + OpenMPScheduleClauseModifier M1; + OpenMPScheduleClauseModifier M2; + ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, + OpenMPScheduleClauseModifier M1, + OpenMPScheduleClauseModifier M2) + : Kind(Kind), M1(M1), M2(M2) {} + }; +} // namespace + +static std::pair emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, bool OuterRegion) { // Detect the loop schedule kind and chunk. auto ScheduleKind = OMPC_SCHEDULE_unknown; + OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; + OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; llvm::Value *Chunk = nullptr; if (const auto *C = S.getSingleClause()) { ScheduleKind = C->getScheduleKind(); + M1 = C->getFirstScheduleModifier(); + M2 = C->getSecondScheduleModifier(); if (const auto *Ch = C->getChunkSize()) { if (auto *ImpRef = cast_or_null(C->getHelperChunkSize())) { if (OuterRegion) { @@ -1454,7 +1468,7 @@ emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, } } } - return std::make_pair(Chunk, ScheduleKind); + return std::make_pair(Chunk, ScheduleKindModifiersTy(ScheduleKind, M1, M2)); } bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { @@ -1530,16 +1544,21 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { auto ScheduleInfo = emitScheduleClause(*this, S, /*OuterRegion=*/false); Chunk = ScheduleInfo.first; - ScheduleKind = ScheduleInfo.second; + ScheduleKind = ScheduleInfo.second.Kind; + const OpenMPScheduleClauseModifier M1 = ScheduleInfo.second.M1; + const OpenMPScheduleClauseModifier M2 = ScheduleInfo.second.M2; const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); const bool Ordered = S.getSingleClause() != nullptr; + // OpenMP 4.5, 2.7.1 Loop Construct, Description. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if no monotonic modifier is specified, the effect will + // be as if the monotonic modifier was specified. if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && !Ordered) { - if (isOpenMPSimdDirective(S.getDirectiveKind())) { - EmitOMPSimdInit(S); - } + if (isOpenMPSimdDirective(S.getDirectiveKind())) + EmitOMPSimdInit(S, /*IsMonotonic=*/true); // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When no chunk_size is specified, the iteration space is divided into // chunks that are approximately equal in size, and at most one chunk is @@ -1549,7 +1568,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), UB.getAddress(), ST.getAddress()); - auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); + auto LoopExit = + getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; @@ -1566,9 +1586,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Tell the runtime we are done. RT.emitForStaticFinish(*this, S.getLocStart()); } else { + const bool IsMonotonic = Ordered || + ScheduleKind == OMPC_SCHEDULE_static || + ScheduleKind == OMPC_SCHEDULE_unknown || + M1 == OMPC_SCHEDULE_MODIFIER_monotonic || + M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered, + EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), Chunk); } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index b4a9186462..b3d5035253 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2365,17 +2365,17 @@ private: /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); - void EmitOMPSimdInit(const OMPLoopDirective &D); + void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); void EmitOMPSimdFinal(const OMPLoopDirective &D); /// \brief Emit code for the worksharing loop-based directive. /// \return true, if this construct has any lastprivate clause, false - /// otherwise. bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, - const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, - Address IL, llvm::Value *Chunk); + bool IsMonotonic, const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, bool Ordered, Address LB, + Address UB, Address ST, Address IL, + llvm::Value *Chunk); /// \brief Emit code for sections directive. OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S); diff --git a/test/OpenMP/for_simd_codegen.cpp b/test/OpenMP/for_simd_codegen.cpp index cc8193d318..e1aa892641 100644 --- a/test/OpenMP/for_simd_codegen.cpp +++ b/test/OpenMP/for_simd_codegen.cpp @@ -28,24 +28,24 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]], // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV:%[^,]+]], -// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]] for (int i = 3; i < 32; i += 5) { // CHECK: [[SIMPLE_LOOP1_BODY]] // Start of body: calculate i from IV: -// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}} // CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5 // CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]] -// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] // ... loop body ... // End of body: store into a[i]: -// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK: store float [[RESULT:%.+]], float* a[i] = b[i] * c[i] * d[i]; -// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 -// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]] // br label %{{.+}}, !llvm.loop !{{.+}} } // CHECK: [[SIMPLE_LOOP1_END]] @@ -132,36 +132,36 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], // CHECK: store i64 [[LB_VAL]], i64* [[OMP_IV3:%[^,]+]], -// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]] +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP3:%.+]] = icmp ule i64 [[IV3]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]] for (unsigned long long it = 2000; it >= 600; it-=400) { // CHECK: [[SIMPLE_LOOP3_BODY]] // Start of body: calculate it from IV: -// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]] // CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400 // CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]] -// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK-NEXT: store i64 [[LC_IT_2]], i64* // // Linear start and step are used to calculate current value of the linear variable. -// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]] +// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]] +// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]], +// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]] +// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]] // CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1 // CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]] -// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]] *g_ptr++ = 0.0; -// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK: store double{{.*}}[[GEP_VAL]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]] +// CHECK: store double{{.*}}[[GEP_VAL]] a[it + lin]++; // CHECK: [[FLT_INC:%.+]] = fadd float -// CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK-NEXT: store float [[FLT_INC]], +// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]] // CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1 -// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]] } // CHECK: [[SIMPLE_LOOP3_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -190,22 +190,22 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]], // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV4:%[^,]+]], -// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]] +// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP4:%.+]] = icmp sle i32 [[IV4]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]] for (short it = 6; it <= 20; it-=-4) { // CHECK: [[SIMPLE_LOOP4_BODY]] // Start of body: calculate it from IV: -// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]] // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]] // CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16 -// CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK-NEXT: store i16 [[LC_IT_3]], i16* -// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]] // CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1 -// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]] } // CHECK: [[SIMPLE_LOOP4_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -227,22 +227,22 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]], // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV5:%[^,]+]], -// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]] +// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP5:%.+]] = icmp sle i32 [[IV5]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]] for (unsigned char it = 'z'; it >= 'a'; it+=-1) { // CHECK: [[SIMPLE_LOOP5_BODY]] // Start of body: calculate it from IV: -// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]] // CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1 // CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]] // CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8 -// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK-NEXT: store i8 [[LC_IT_2]], i8* -// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]] // CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1 -// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]] } // CHECK: [[SIMPLE_LOOP5_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -276,24 +276,24 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]] // CHECK: [[SIMD_LOOP7_COND]] -// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]] +// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP7:%.+]] = icmp sle i64 [[IV7]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]] for (long long i = -10; i < 10; i += 3) { // CHECK: [[SIMPLE_LOOP7_BODY]] // Start of body: calculate i from IV: -// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]] // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] -// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] -// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]], +// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]] // CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32 -// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]], A = i; -// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]] // CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1 -// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]] } // CHECK: [[SIMPLE_LOOP7_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -329,23 +329,23 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]] // CHECK: [[SIMD_LOOP8_COND]] -// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID:[0-9]+]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]] +// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP8:%.+]] = icmp sle i64 [[IV8]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]] for (long long i = -10; i < 10; i += 3) { // CHECK: [[SIMPLE_LOOP8_BODY]] // Start of body: calculate i from IV: -// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]] // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] -// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] -// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] -// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]], +// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]] +// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]], R *= i; -// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]] // CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1 -// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]] } // CHECK: [[SIMPLE_LOOP8_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -389,27 +389,27 @@ int templ1(T a, T *z) { // CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], // ... -// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID:[0-9]+]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] +// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] // CHECK: [[T1_BODY]] // Loop counters i and j updates: -// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] // CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 // CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 // CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 -// CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] -// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: store i32 [[I_2]], i32* +// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] // CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4 // CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] -// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* // simd.for.inc: -// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] // CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 -// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]] // CHECK-NEXT: br label {{%.+}} // CHECK: [[T1_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -480,14 +480,14 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) { // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]], // CHECK: store i32 [[LB_VAL]], i32* [[IT_OMP_IV:%[^,]+]], -// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID:[0-9]+]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]] +// CHECK-NEXT: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]] for (IterDouble i = ia; i < ib; ++i) { // CHECK: [[IT_BODY]] // Start of body: calculate i from index: -// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]] // Call of operator+ (i, IV). // CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}} // ... loop body ... @@ -495,12 +495,12 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) { // Float multiply and save result. // CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01 // CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}} -// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]] ++ic; // -// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]] // CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1 -// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]] // br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]] } // CHECK: [[IT_END]] @@ -533,8 +533,8 @@ void collapsed(float *a, float *b, float *c, float *d) { // #pragma omp for simd collapse(4) -// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp ule i32 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]] for (i = 1; i < 3; i++) // 2 iterations @@ -544,25 +544,25 @@ void collapsed(float *a, float *b, float *c, float *d) { { // CHECK: [[COLL1_BODY]] // Start of body: calculate i from index: -// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]] // Calculation of the loop counters values. // CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60 // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] -// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20 // CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] -// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5 // CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4 // CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1 // CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]] -// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5 // CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1 // CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]] @@ -570,12 +570,12 @@ void collapsed(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]] // ... loop body ... // End of body: store into a[i]: -// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]] float res = b[j] * c[k]; a[i] = res * d[l]; -// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1 -// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]] // br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]] // CHECK: [[COLL1_END]] } @@ -621,8 +621,8 @@ void widened(float *a, float *b, float *c, float *d) { // #pragma omp for simd collapse(2) private(globalfloat, localint) -// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]] +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]] for (i = 1; i < 3; i++) // 2 iterations @@ -630,10 +630,10 @@ void widened(float *a, float *b, float *c, float *d) { { // CHECK: [[WIDE1_BODY]] // Start of body: calculate i from index: -// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]] // Calculation of the loop counters values... // CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]] -// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]] // CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]] // ... loop body ... // @@ -642,14 +642,14 @@ void widened(float *a, float *b, float *c, float *d) { globalfloat = (float)j/i; float res = b[j] * c[j]; // Store into a[i]: -// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]] a[i] = res * d[i]; // Then there's a store into private var localint: -// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]] localint = (int)j; -// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]] // CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1 -// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]] // // br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]] // CHECK: [[WIDE1_END]] diff --git a/test/OpenMP/ordered_codegen.cpp b/test/OpenMP/ordered_codegen.cpp index 6448ddf083..daa6211b4c 100644 --- a/test/OpenMP/ordered_codegen.cpp +++ b/test/OpenMP/ordered_codegen.cpp @@ -224,8 +224,8 @@ void foo_simd(int low, int up) { #pragma omp ordered simd f[i] = 1.0; } - // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access ! - // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access ! + // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}} + // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}} #pragma omp for simd ordered for (int i = low; i < up; ++i) { f[i] = 0.0; diff --git a/test/OpenMP/parallel_for_simd_codegen.cpp b/test/OpenMP/parallel_for_simd_codegen.cpp index 53f18d09b7..9a6ad2f06f 100644 --- a/test/OpenMP/parallel_for_simd_codegen.cpp +++ b/test/OpenMP/parallel_for_simd_codegen.cpp @@ -41,24 +41,24 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]], // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV:%[^,]+]], -// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]] for (int i = 3; i < 32; i += 5) { // CHECK: [[SIMPLE_LOOP1_BODY]] // Start of body: calculate i from IV: -// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]] // CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5 // CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]] -// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] // ... loop body ... // End of body: store into a[i]: -// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK: store float [[RESULT:%.+]], float* a[i] = b[i] * c[i] * d[i]; -// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 -// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]] +// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]] // br label %{{.+}}, !llvm.loop !{{.+}} } // CHECK: [[SIMPLE_LOOP1_END]] @@ -141,36 +141,36 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], // CHECK: store i64 [[LB_VAL]], i64* [[OMP_IV3:%[^,]+]], -// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]] +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP3:%.+]] = icmp ule i64 [[IV3]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]] for (unsigned long long it = 2000; it >= 600; it-=400) { // CHECK: [[SIMPLE_LOOP3_BODY]] // Start of body: calculate it from IV: -// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]] // CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400 // CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]] -// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}} // // Linear start and step are used to calculate current value of the linear variable. -// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]] +// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]] +// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]] +// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]] +// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]] // CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1 // CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]] -// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]] *g_ptr++ = 0.0; -// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK: store double{{.*}}[[GEP_VAL]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]] +// CHECK: store double{{.*}}[[GEP_VAL]] a[it + lin]++; // CHECK: [[FLT_INC:%.+]] = fadd float -// CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] -// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK-NEXT: store float [[FLT_INC]], +// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]] // CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1 -// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]] +// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]] } // CHECK: [[SIMPLE_LOOP3_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -198,22 +198,22 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]], // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV4:%[^,]+]], -// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]] +// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP4:%.+]] = icmp sle i32 [[IV4]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]] for (short it = 6; it <= 20; it-=-4) { // CHECK: [[SIMPLE_LOOP4_BODY]] // Start of body: calculate it from IV: -// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]] // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]] // CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16 -// CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK-NEXT: store i16 [[LC_IT_3]], i16* -// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]] // CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1 -// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]] +// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]] } // CHECK: [[SIMPLE_LOOP4_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -234,22 +234,22 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]], // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV5:%[^,]+]], -// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]] +// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP5:%.+]] = icmp sle i32 [[IV5]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]] for (unsigned char it = 'z'; it >= 'a'; it+=-1) { // CHECK: [[SIMPLE_LOOP5_BODY]] // Start of body: calculate it from IV: -// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]] // CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1 // CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]] // CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8 -// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, -// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]] // CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1 -// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]] +// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]] } // CHECK: [[SIMPLE_LOOP5_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -280,24 +280,24 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]] // CHECK: [[SIMD_LOOP7_COND]] -// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]] +// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP7:%.+]] = icmp sle i64 [[IV7]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]] for (long long i = -10; i < 10; i += 3) { // CHECK: [[SIMPLE_LOOP7_BODY]] // Start of body: calculate i from IV: -// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]] // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] -// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] -// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]], +// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]] // CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32 -// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]], A = i; -// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]] // CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1 -// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] +// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]] } // CHECK: [[SIMPLE_LOOP7_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -330,23 +330,23 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]] // CHECK: [[SIMD_LOOP8_COND]] -// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID:[0-9]+]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]] +// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP8:%.+]] = icmp sle i64 [[IV8]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]] for (long long i = -10; i < 10; i += 3) { // CHECK: [[SIMPLE_LOOP8_BODY]] // Start of body: calculate i from IV: -// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]] // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] -// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] -// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] -// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]], +// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]] +// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]], R *= i; -// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]] // CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1 -// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]] } // CHECK: [[SIMPLE_LOOP8_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) @@ -438,14 +438,14 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) { // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]], // CHECK: store i32 [[LB_VAL]], i32* [[IT_OMP_IV:%[^,]+]], -// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID:[0-9]+]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]] +// CHECK-NEXT: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]] for (IterDouble i = ia; i < ib; ++i) { // CHECK: [[IT_BODY]] // Start of body: calculate i from index: -// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]] // Call of operator+ (i, IV). // CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}} // ... loop body ... @@ -453,12 +453,12 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) { // Float multiply and save result. // CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01 // CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}} -// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]] ++ic; // -// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]] // CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1 -// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]] +// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]] // br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]] } // CHECK: [[IT_END]] @@ -490,8 +490,8 @@ void collapsed(float *a, float *b, float *c, float *d) { // #pragma omp parallel for simd collapse(4) -// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp ule i32 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]] for (i = 1; i < 3; i++) // 2 iterations @@ -501,25 +501,25 @@ void collapsed(float *a, float *b, float *c, float *d) { { // CHECK: [[COLL1_BODY]] // Start of body: calculate i from index: -// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]] // Calculation of the loop counters values. // CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60 // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] -// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20 // CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] -// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5 // CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4 // CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1 // CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]] -// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5 // CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1 // CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]] @@ -527,12 +527,12 @@ void collapsed(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]] // ... loop body ... // End of body: store into a[i]: -// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]] float res = b[j] * c[k]; a[i] = res * d[l]; -// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]] // CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1 -// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]] // br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]] // CHECK: [[COLL1_END]] } @@ -577,8 +577,8 @@ void widened(float *a, float *b, float *c, float *d) { // #pragma omp parallel for simd collapse(2) private(globalfloat, localint) -// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID:[0-9]+]] -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]] +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]] for (i = 1; i < 3; i++) // 2 iterations @@ -586,10 +586,10 @@ void widened(float *a, float *b, float *c, float *d) { { // CHECK: [[WIDE1_BODY]] // Start of body: calculate i from index: -// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]] // Calculation of the loop counters values... // CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]] -// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]] // CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]] // ... loop body ... // @@ -598,14 +598,14 @@ void widened(float *a, float *b, float *c, float *d) { globalfloat = (float)j/i; float res = b[j] * c[j]; // Store into a[i]: -// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]] a[i] = res * d[i]; // Then there's a store into private var localint: -// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]] localint = (int)j; -// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]] // CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1 -// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]] +// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]] // // br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]] // CHECK: [[WIDE1_END]] @@ -636,27 +636,27 @@ void widened(float *a, float *b, float *c, float *d) { // CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], // ... -// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID:[0-9]+]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] +// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] // CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] // CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] // CHECK: [[T1_BODY]] // Loop counters i and j updates: -// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] // CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 // CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 // CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 -// CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] -// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: store i32 [[I_2]], i32* +// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] // CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4 // CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] -// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* // simd.for.inc: -// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] // CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 -// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: store i64 [[INC]], i64* // CHECK-NEXT: br label {{%.+}} // CHECK: [[T1_END]] // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) diff --git a/test/OpenMP/schedule_codegen.cpp b/test/OpenMP/schedule_codegen.cpp new file mode 100644 index 0000000000..bd5ec86b29 --- /dev/null +++ b/test/OpenMP/schedule_codegen.cpp @@ -0,0 +1,194 @@ +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm -fexceptions -fcxx-exceptions -o - %s | FileCheck %s + +int main() { +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for simd + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for schedule(static) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for simd schedule(static) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for schedule(static, 2) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for simd schedule(static, 2) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for schedule(auto) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(auto) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for schedule(runtime) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(runtime) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for schedule(guided) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(guided) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for schedule(dynamic) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(dynamic) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for schedule(monotonic: static) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for simd schedule(monotonic: static) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for schedule(monotonic: static, 2) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_for_static_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_for_static_fini +#pragma omp for simd schedule(monotonic: static, 2) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for schedule(monotonic: auto) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(monotonic: auto) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for schedule(monotonic: runtime) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(monotonic: runtime) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for schedule(monotonic: guided) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(monotonic: guided) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for schedule(monotonic: dynamic) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(monotonic: dynamic) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for schedule(nonmonotonic: guided) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(nonmonotonic: guided) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for schedule(nonmonotonic: dynamic) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(nonmonotonic: dynamic) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for schedule(static) ordered + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for simd schedule(static) ordered + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for schedule(static, 2) ordered(1) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for simd schedule(static, 2) ordered + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for schedule(auto) ordered(1) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +#pragma omp for simd schedule(auto) ordered + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for schedule(runtime) ordered + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for simd schedule(runtime) ordered + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for schedule(guided) ordered(1) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for simd schedule(guided) ordered + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for schedule(dynamic) ordered(1) + for(int i = 0; i < 10; ++i); +// CHECK: @__kmpc_dispatch_init +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK: @__kmpc_dispatch_next +#pragma omp for simd schedule(dynamic) + for(int i = 0; i < 10; ++i); + return 0; +}