From: Alexey Bataev Date: Wed, 9 Mar 2016 09:49:09 +0000 (+0000) Subject: [OPENMP 4.5] Codegen for data members in 'linear' clause X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b8ec8af9777567f250fc4948b0d9ffc352a82ad3;p=clang [OPENMP 4.5] Codegen for data members in 'linear' clause OpenMP 4.5 allows privatization of non-static data members in OpenMP constructs. Patch adds proper codegen support for data members in 'linear' clause git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@263003 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 211155edd2..808af527a3 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1114,17 +1114,19 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { for (const auto *C : D.getClausesOfKind()) { for (auto Init : C->inits()) { auto *VD = cast(cast(Init)->getDecl()); - auto *OrigVD = cast( - cast(VD->getInit()->IgnoreImpCasts())->getDecl()); - DeclRefExpr DRE(const_cast(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - VD->getInit()->getType(), VK_LValue, - VD->getInit()->getExprLoc()); - AutoVarEmission Emission = EmitAutoVarAlloca(*VD); - EmitExprAsInit(&DRE, VD, - MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), - /*capturedByInit=*/false); - EmitAutoVarCleanups(Emission); + if (auto *Ref = dyn_cast(VD->getInit()->IgnoreImpCasts())) { + AutoVarEmission Emission = EmitAutoVarAlloca(*VD); + auto *OrigVD = cast(Ref->getDecl()); + DeclRefExpr DRE(const_cast(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + VD->getInit()->getType(), VK_LValue, + VD->getInit()->getExprLoc()); + EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), + VD->getType()), + /*capturedByInit=*/false); + EmitAutoVarCleanups(Emission); + } else + EmitVarDecl(*VD); } // Emit the linear steps for the linear clauses. // If a step is not constant, it is pre-calculated before the loop. @@ -1137,14 +1139,26 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { } } -static void emitLinearClauseFinal(CodeGenFunction &CGF, - const OMPLoopDirective &D) { +static void emitLinearClauseFinal( + CodeGenFunction &CGF, const OMPLoopDirective &D, + const llvm::function_ref &CondGen) { if (!CGF.HaveInsertPoint()) return; + llvm::BasicBlock *DoneBB = nullptr; // Emit the final values of the linear variables. for (const auto *C : D.getClausesOfKind()) { auto IC = C->varlist_begin(); for (auto F : C->finals()) { + if (!DoneBB) { + if (auto *Cond = CondGen(CGF)) { + // If the first post-update expression is found, emit conditional + // block if it was requested. + auto *ThenBB = CGF.createBasicBlock(".omp.linear.pu"); + DoneBB = CGF.createBasicBlock(".omp.linear.pu.done"); + CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); + CGF.EmitBlock(ThenBB); + } + } auto *OrigVD = cast(cast(*IC)->getDecl()); DeclRefExpr DRE(const_cast(OrigVD), CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, @@ -1158,8 +1172,10 @@ static void emitLinearClauseFinal(CodeGenFunction &CGF, ++IC; } if (auto *PostUpdate = C->getPostUpdateExpr()) - EmitIgnoredExpr(PostUpdate); + CGF.EmitIgnoredExpr(PostUpdate); } + if (DoneBB) + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } static void emitAlignedClause(CodeGenFunction &CGF, @@ -1296,13 +1312,26 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, emitSimdlenSafelenClause(*this, D, IsMonotonic); } -void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { +void CodeGenFunction::EmitOMPSimdFinal( + const OMPLoopDirective &D, + const llvm::function_ref &CondGen) { if (!HaveInsertPoint()) return; + llvm::BasicBlock *DoneBB = nullptr; auto IC = D.counters().begin(); for (auto F : D.finals()) { auto *OrigVD = cast(cast((*IC))->getDecl()); if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { + if (!DoneBB) { + if (auto *Cond = CondGen(*this)) { + // If the first post-update expression is found, emit conditional + // block if it was requested. + auto *ThenBB = createBasicBlock(".omp.final.then"); + DoneBB = createBasicBlock(".omp.final.done"); + Builder.CreateCondBr(Cond, ThenBB, DoneBB); + EmitBlock(ThenBB); + } + } DeclRefExpr DRE(const_cast(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); @@ -1315,7 +1344,8 @@ void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { } ++IC; } - emitLinearClauseFinal(*this, D); + if (DoneBB) + EmitBlock(DoneBB, /*IsFinished=*/true); } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { @@ -1387,7 +1417,10 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } - CGF.EmitOMPSimdFinal(S); + CGF.EmitOMPSimdFinal( + S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + emitLinearClauseFinal( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); // Emit: if (PreCond) - end. if (ContBlock) { CGF.EmitBranch(ContBlock); @@ -1651,18 +1684,18 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { emitAlignedClause(*this, S); EmitOMPLinearClauseInit(S); + // Emit helper vars inits. + LValue LB = + EmitOMPHelperVar(*this, cast(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast(S.getUpperBoundVariable())); + LValue ST = + EmitOMPHelperVar(*this, cast(S.getStrideVariable())); + LValue IL = + EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); + // Emit 'then' code. { - // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast(S.getUpperBoundVariable())); - LValue ST = - EmitOMPHelperVar(*this, cast(S.getStrideVariable())); - LValue IL = - EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); - OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on @@ -1759,8 +1792,15 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { - EmitOMPSimdFinal(S); + EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); } + emitLinearClauseFinal(*this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); // We're now done with the loop, so jump to the continuation block. if (ContBlock) { EmitBranch(ContBlock); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index a84a0f0e73..13a3acbaed 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2389,7 +2389,9 @@ private: /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); - void EmitOMPSimdFinal(const OMPLoopDirective &D); + void EmitOMPSimdFinal( + const OMPLoopDirective &D, + const llvm::function_ref &CondGen); /// \brief Emit code for the worksharing loop-based directive. /// \return true, if this construct has any lastprivate clause, false - /// otherwise. diff --git a/test/OpenMP/for_linear_codegen.cpp b/test/OpenMP/for_linear_codegen.cpp index db9788361d..0ad45f5b70 100644 --- a/test/OpenMP/for_linear_codegen.cpp +++ b/test/OpenMP/for_linear_codegen.cpp @@ -23,6 +23,74 @@ volatile int &g1 = g; float f; char cnt; +struct SS { + int a; + int b : 4; + int &c; + SS(int &d) : a(0), b(0), c(d) { +#pragma omp parallel +#pragma omp for linear(a, b, c) + for (int i = 0; i < 2; ++i) +#ifdef LAMBDA + [&]() { + ++this->a, --b, (this)->c /= 1; +#pragma omp parallel +#pragma omp for linear(a, b) linear(ref(c)) + for (int i = 0; i < 2; ++i) + ++(this)->a, --b, this->c /= 1; + }(); +#elif defined(BLOCKS) + ^{ + ++a; + --this->b; + (this)->c /= 1; +#pragma omp parallel +#pragma omp for linear(a, b) linear(uval(c)) + for (int i = 0; i < 2; ++i) + ++(this)->a, --b, this->c /= 1; + }(); +#else + ++this->a, --b, c /= 1; +#endif + } +}; + +template +struct SST { + T a; + SST() : a(T()) { +#pragma omp parallel +#pragma omp for linear(a) + for (int i = 0; i < 2; ++i) +#ifdef LAMBDA + [&]() { + [&]() { + ++this->a; +#pragma omp parallel +#pragma omp for linear(a) + for (int i = 0; i < 2; ++i) + ++(this)->a; + }(); + }(); +#elif defined(BLOCKS) + ^{ + ^{ + ++a; +#pragma omp parallel +#pragma omp for linear(a) + for (int i = 0; i < 2; ++i) + ++(this)->a; + }(); + }(); +#else + ++(this)->a; +#endif + } +}; + +// CHECK: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 +// LAMBDA: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 +// BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 // CHECK: [[S_FLOAT_TY:%.+]] = type { float } // CHECK: [[S_INT_TY:%.+]] = type { i32 } // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* @@ -31,6 +99,7 @@ char cnt; template T tmain() { S test; + SST sst; T *pvar = &test.f; T &lvar = test.f; #pragma omp parallel @@ -42,16 +111,75 @@ T tmain() { } int main() { + static int sivar; + SS ss(sivar); #ifdef LAMBDA // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, // LAMBDA-LABEL: @main - // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + // LAMBDA: alloca [[SS_TY]], + // LAMBDA: alloca [[CAP_TY:%.+]], + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]([[CAP_TY]]* [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}) #pragma omp parallel #pragma omp for linear(g, g1:5) for (int i = 0; i < 2; ++i) { + // LAMBDA: define {{.+}} @{{.+}}([[SS_TY]]* + // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 + // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* % + // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 + // LAMBDA: store i8 + // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 + // LAMBDA: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*)* [[SS_MICROTASK:@.+]] to void + // LAMBDA: ret + + // LAMBDA: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}) + // LAMBDA: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 0 + // LAMBDA-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1 + // LAMBDA: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 2 + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* + // LAMBDA: call{{.*}} void + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: br i1 + // LAMBDA: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1 + // LAMBDA: store i8 %{{.+}}, i8* [[B_REF]], + // LAMBDA: br label + // LAMBDA: ret void + + // LAMBDA: define internal void @{{.+}}(i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}}) + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]], + // LAMBDA: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]], + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], + // LAMBDA-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], + // LAMBDA-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 + // LAMBDA-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], + // LAMBDA-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], + // LAMBDA-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 + // LAMBDA-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], + // LAMBDA-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], + // LAMBDA-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], + // LAMBDA-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 + // LAMBDA-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: br i1 + // LAMBDA: br label + // LAMBDA: ret void + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}}, @@ -96,6 +224,7 @@ int main() { #elif defined(BLOCKS) // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212, // BLOCKS-LABEL: @main + // BLOCKS: call // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* @@ -146,6 +275,60 @@ int main() { } }(); return 0; +// BLOCKS: define {{.+}} @{{.+}}([[SS_TY]]* +// BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 +// BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* % +// BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 +// BLOCKS: store i8 +// BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 +// BLOCKS: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*)* [[SS_MICROTASK:@.+]] to void +// BLOCKS: ret + +// BLOCKS: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}) +// BLOCKS: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 0 +// BLOCKS-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1 +// BLOCKS: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 2 +// BLOCKS: call void @__kmpc_for_static_init_4( +// BLOCKS-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* +// BLOCKS: call{{.*}} void +// BLOCKS: call void @__kmpc_for_static_fini( +// BLOCKS: br i1 +// BLOCKS: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1 +// BLOCKS: store i8 %{{.+}}, i8* [[B_REF]], +// BLOCKS: br label +// BLOCKS: ret void + +// BLOCKS: define internal void @{{.+}}(i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}}) +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, +// BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, +// BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, +// BLOCKS: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]], +// BLOCKS: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]], +// BLOCKS: call void @__kmpc_for_static_init_4( +// BLOCKS: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], +// BLOCKS-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], +// BLOCKS-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 +// BLOCKS-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], +// BLOCKS-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], +// BLOCKS-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 +// BLOCKS-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], +// BLOCKS-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], +// BLOCKS-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], +// BLOCKS-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 +// BLOCKS-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], +// BLOCKS: call void @__kmpc_for_static_fini( +// BLOCKS: br i1 +// BLOCKS: br label +// BLOCKS: ret void #else S test; float *pvar = &test.f; @@ -216,7 +399,51 @@ int main() { // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 2, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32**, i32*)* [[TMAIN_MICROTASK:@.+]] to void // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: ret -// + +// CHECK: define {{.+}} @{{.+}}([[SS_TY]]* +// CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* % +// CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: store i8 +// CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*)* [[SS_MICROTASK:@.+]] to void +// CHECK: ret + +// CHECK: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}) +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]], +// CHECK: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]], +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], +// CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], +// CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 +// CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], +// CHECK-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], +// CHECK-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 +// CHECK-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], +// CHECK-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], +// CHECK-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], +// CHECK-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 +// CHECK-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: br i1 +// CHECK: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1 +// CHECK: store i8 %{{.+}}, i8* [[B_REF]], +// CHECK: br label +// CHECK: ret void + // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}}) // CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_START:%.+]] = alloca i32*, diff --git a/test/OpenMP/for_simd_codegen.cpp b/test/OpenMP/for_simd_codegen.cpp index e1aa892641..7c012f0279 100644 --- a/test/OpenMP/for_simd_codegen.cpp +++ b/test/OpenMP/for_simd_codegen.cpp @@ -581,7 +581,8 @@ void collapsed(float *a, float *b, float *c, float *d) { } // i,j,l are updated; k is not updated. // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}}) -// CHECK-NEXT: store i32 3, i32* [[I:%[^,]+]] +// CHECK: br i1 +// CHECK: store i32 3, i32* [[I:%[^,]+]] // CHECK-NEXT: store i32 5, i32* [[I:%[^,]+]] // CHECK-NEXT: store i16 9, i16* [[I:%[^,]+]] // CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})