From d11a98c322f59af36bb74fb90870fc52148d092a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 10 Sep 2015 09:48:30 +0000 Subject: [PATCH] [OPENMP] Propagate alignment from original variables to the private copies. Currently private copies of captured variables have default alignment. Patch makes private variables to have same alignment as original variables. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@247260 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Sema/SemaOpenMP.cpp | 54 +++++++++++++------ test/OpenMP/for_lastprivate_codegen.cpp | 22 ++++---- test/OpenMP/parallel_copyin_codegen.cpp | 32 +++++------ test/OpenMP/parallel_firstprivate_codegen.cpp | 26 ++++----- test/OpenMP/parallel_private_codegen.cpp | 22 ++++---- test/OpenMP/parallel_reduction_codegen.cpp | 24 ++++----- 6 files changed, 100 insertions(+), 80 deletions(-) diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 1a7c7760ef..ebfb136598 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -452,12 +452,17 @@ bool DSAStackTy::isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter) { /// \brief Build a variable declaration for OpenMP loop iteration variable. static VarDecl *buildVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type, - StringRef Name) { + StringRef Name, const AttrVec *Attrs = nullptr) { DeclContext *DC = SemaRef.CurContext; IdentifierInfo *II = &SemaRef.PP.getIdentifierTable().get(Name); TypeSourceInfo *TInfo = SemaRef.Context.getTrivialTypeSourceInfo(Type, Loc); VarDecl *Decl = VarDecl::Create(SemaRef.Context, DC, Loc, Loc, II, Type, TInfo, SC_None); + if (Attrs) { + for (specific_attr_iterator I(Attrs->begin()), E(Attrs->end()); + I != E; ++I) + Decl->addAttr(*I); + } Decl->setImplicit(); return Decl; } @@ -723,9 +728,9 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { // by the address of the new private variable in CodeGen. This new // variable is not added to IdResolver, so the code in the OpenMP // region uses original variable for proper diagnostics. - auto *VDPrivate = - buildVarDecl(*this, DE->getExprLoc(), Type.getUnqualifiedType(), - VD->getName()); + auto *VDPrivate = buildVarDecl( + *this, DE->getExprLoc(), Type.getUnqualifiedType(), + VD->getName(), VD->hasAttrs() ? &VD->getAttrs() : nullptr); ActOnUninitializedDecl(VDPrivate, /*TypeMayContainAuto=*/false); if (VDPrivate->isInvalidDecl()) continue; @@ -2732,6 +2737,8 @@ public: NewVD->setPreviousDeclInSameBlockScope( VD->isPreviousDeclInSameBlockScope()); VD->getDeclContext()->addHiddenDecl(NewVD); + if (VD->hasAttrs()) + NewVD->setAttrs(VD->getAttrs()); transformedLocalDecl(VD, NewVD); return NewVD; } @@ -2911,7 +2918,9 @@ Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const { Expr *OpenMPIterationSpaceChecker::BuildPrivateCounterVar() const { if (Var && !Var->isInvalidDecl()) { auto Type = Var->getType().getNonReferenceType(); - auto *PrivateVar = buildVarDecl(SemaRef, DefaultLoc, Type, Var->getName()); + auto *PrivateVar = + buildVarDecl(SemaRef, DefaultLoc, Type, Var->getName(), + Var->hasAttrs() ? &Var->getAttrs() : nullptr); if (PrivateVar->isInvalidDecl()) return nullptr; return buildDeclRefExpr(SemaRef, PrivateVar, Type, DefaultLoc); @@ -5658,7 +5667,8 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, // IdResolver, so the code in the OpenMP region uses original variable for // proper diagnostics. Type = Type.getUnqualifiedType(); - auto VDPrivate = buildVarDecl(*this, DE->getExprLoc(), Type, VD->getName()); + auto VDPrivate = buildVarDecl(*this, DE->getExprLoc(), Type, VD->getName(), + VD->hasAttrs() ? &VD->getAttrs() : nullptr); ActOnUninitializedDecl(VDPrivate, /*TypeMayContainAuto=*/false); if (VDPrivate->isInvalidDecl()) continue; @@ -5861,7 +5871,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, } Type = Type.getUnqualifiedType(); - auto VDPrivate = buildVarDecl(*this, ELoc, Type, VD->getName()); + auto VDPrivate = buildVarDecl(*this, ELoc, Type, VD->getName(), + VD->hasAttrs() ? &VD->getAttrs() : nullptr); // Generate helper private variable and initialize it with the value of the // original variable. The address of the original variable is replaced by // the address of the new private variable in the CodeGen. This new variable @@ -6017,11 +6028,13 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef VarList, // operator for the class type. Type = Context.getBaseElementType(Type).getNonReferenceType(); auto *SrcVD = buildVarDecl(*this, DE->getLocStart(), - Type.getUnqualifiedType(), ".lastprivate.src"); + Type.getUnqualifiedType(), ".lastprivate.src", + VD->hasAttrs() ? &VD->getAttrs() : nullptr); auto *PseudoSrcExpr = buildDeclRefExpr( *this, SrcVD, Type.getUnqualifiedType(), DE->getExprLoc()); auto *DstVD = - buildVarDecl(*this, DE->getLocStart(), Type, ".lastprivate.dst"); + buildVarDecl(*this, DE->getLocStart(), Type, ".lastprivate.dst", + VD->hasAttrs() ? &VD->getAttrs() : nullptr); auto *PseudoDstExpr = buildDeclRefExpr(*this, DstVD, Type, DE->getExprLoc()); // For arrays generate assignment operation for single element and replace @@ -6403,8 +6416,10 @@ OMPClause *Sema::ActOnOpenMPReductionClause( } } Type = Type.getNonLValueExprType(Context).getUnqualifiedType(); - auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs"); - auto *RHSVD = buildVarDecl(*this, ELoc, Type, VD->getName()); + auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs", + VD->hasAttrs() ? &VD->getAttrs() : nullptr); + auto *RHSVD = buildVarDecl(*this, ELoc, Type, VD->getName(), + VD->hasAttrs() ? &VD->getAttrs() : nullptr); // Add initializer for private variable. Expr *Init = nullptr; switch (BOK) { @@ -6670,7 +6685,8 @@ OMPClause *Sema::ActOnOpenMPLinearClause( } // Build private copy of original var. - auto *Private = buildVarDecl(*this, ELoc, QType, VD->getName()); + auto *Private = buildVarDecl(*this, ELoc, QType, VD->getName(), + VD->hasAttrs() ? &VD->getAttrs() : nullptr); auto *PrivateRef = buildDeclRefExpr( *this, Private, DE->getType().getUnqualifiedType(), DE->getExprLoc()); // Build var to save initial value. @@ -6922,12 +6938,14 @@ OMPClause *Sema::ActOnOpenMPCopyinClause(ArrayRef VarList, // copyin clause requires an accessible, unambiguous copy assignment // operator for the class type. auto ElemType = Context.getBaseElementType(Type).getNonReferenceType(); - auto *SrcVD = buildVarDecl(*this, DE->getLocStart(), - ElemType.getUnqualifiedType(), ".copyin.src"); + auto *SrcVD = + buildVarDecl(*this, DE->getLocStart(), ElemType.getUnqualifiedType(), + ".copyin.src", VD->hasAttrs() ? &VD->getAttrs() : nullptr); auto *PseudoSrcExpr = buildDeclRefExpr( *this, SrcVD, ElemType.getUnqualifiedType(), DE->getExprLoc()); auto *DstVD = - buildVarDecl(*this, DE->getLocStart(), ElemType, ".copyin.dst"); + buildVarDecl(*this, DE->getLocStart(), ElemType, ".copyin.dst", + VD->hasAttrs() ? &VD->getAttrs() : nullptr); auto *PseudoDstExpr = buildDeclRefExpr(*this, DstVD, ElemType, DE->getExprLoc()); // For arrays generate assignment operation for single element and replace @@ -7047,11 +7065,13 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, Type = Context.getBaseElementType(Type.getNonReferenceType()) .getUnqualifiedType(); auto *SrcVD = - buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.src"); + buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.src", + VD->hasAttrs() ? &VD->getAttrs() : nullptr); auto *PseudoSrcExpr = buildDeclRefExpr(*this, SrcVD, Type, DE->getExprLoc()); auto *DstVD = - buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.dst"); + buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.dst", + VD->hasAttrs() ? &VD->getAttrs() : nullptr); auto *PseudoDstExpr = buildDeclRefExpr(*this, DstVD, Type, DE->getExprLoc()); auto AssignmentOp = BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign, diff --git a/test/OpenMP/for_lastprivate_codegen.cpp b/test/OpenMP/for_lastprivate_codegen.cpp index cada3a06e5..9cfb73fd09 100644 --- a/test/OpenMP/for_lastprivate_codegen.cpp +++ b/test/OpenMP/for_lastprivate_codegen.cpp @@ -18,7 +18,7 @@ struct S { ~S() {} }; -volatile int g = 1212; +volatile int g __attribute__((aligned(128)))= 1212; volatile int &g1 = g; float f; char cnt; @@ -32,10 +32,10 @@ char cnt; template T tmain() { S test; - T t_var = T(); - T vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S &var = test; + T t_var __attribute__((aligned(128))) = T(); + T vec[] __attribute__((aligned(128))) = {1, 2}; + S s_arr[] __attribute__((aligned(128))) = {1, 2}; + S &var __attribute__((aligned(128))) = test; #pragma omp parallel #pragma omp for lastprivate(t_var, vec, s_arr, var) for (int i = 0; i < 2; ++i) { @@ -69,7 +69,7 @@ int main() { // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, - // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128 // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) @@ -122,7 +122,7 @@ int main() { // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, - // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128 // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) @@ -402,10 +402,10 @@ int main() { // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128 +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128 +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128 +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 // CHECK: [[VAR_PRIV_REF:%.+]] = alloca [[S_INT_TY]]*, // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] diff --git a/test/OpenMP/parallel_copyin_codegen.cpp b/test/OpenMP/parallel_copyin_codegen.cpp index f29fb8b641..e7c69697e2 100644 --- a/test/OpenMP/parallel_copyin_codegen.cpp +++ b/test/OpenMP/parallel_copyin_codegen.cpp @@ -17,7 +17,7 @@ #ifndef HEADER #define HEADER -volatile int g = 1212; +volatile int g __attribute__((aligned(128))) = 1212; #pragma omp threadprivate(g) template @@ -58,10 +58,10 @@ template T tmain() { S test; test = S(); - static T t_var = 333; - static T vec[] = {3, 3}; - static S s_arr[] = {1, 2}; - static S var(3); + static T t_var __attribute__((aligned(128))) = 333; + static T vec[] __attribute__((aligned(128))) = {3, 3}; + static S s_arr[] __attribute__((aligned(128))) = {1, 2}; + static S var __attribute__((aligned(128))) (3); #pragma omp threadprivate(t_var, vec, s_arr, var) #pragma omp parallel copyin(t_var, vec, s_arr, var) { @@ -103,8 +103,8 @@ int main() { // LAMBDA: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[G]] to i{{[0-9]+}}), %{{.+}} // LAMBDA: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] // LAMBDA: [[NOT_MASTER]] - // LAMBDA: load i{{[0-9]+}}, i{{[0-9]+}}* [[G]], - // LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, + // LAMBDA: load i{{[0-9]+}}, i{{[0-9]+}}* [[G]], align 128 + // LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128 // LAMBDA: [[DONE]] // TLS-LAMBDA-DAG: [[G_CAPTURE_SRC:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % @@ -115,7 +115,7 @@ int main() { // TLS-LAMBDA: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] // TLS-LAMBDA: [[NOT_MASTER]] // TLS-LAMBDA: load i{{[0-9]+}}, i{{[0-9]+}}* [[G_CAPTURE_SRC]], - // TLS-LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* [[G_CAPTURE_DST]], + // TLS-LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* [[G_CAPTURE_DST]], align 128 // TLS-LAMBDA: [[DONE]] // LAMBDA: call {{.*}}i32 @__kmpc_cancel_barrier( @@ -130,7 +130,7 @@ int main() { // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] // TLS-LAMBDA: [[G_CAPTURE_DST:%.+]] = call i{{[0-9]+}}* [[G_CTOR]]() - // TLS-LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_CAPTURE_DST]] + // TLS-LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_CAPTURE_DST]], align 128 }(); } }(); @@ -164,8 +164,8 @@ int main() { // BLOCKS: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[G]] to i{{[0-9]+}}), %{{.+}} // BLOCKS: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] // BLOCKS: [[NOT_MASTER]] - // BLOCKS: load i{{[0-9]+}}, i{{[0-9]+}}* [[G]], - // BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, + // BLOCKS: load i{{[0-9]+}}, i{{[0-9]+}}* [[G]], align 128 + // BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128 // BLOCKS: [[DONE]] // TLS-BLOCKS-DAG: [[G_CAPTURE_SRC:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % @@ -176,7 +176,7 @@ int main() { // TLS-BLOCKS: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] // TLS-BLOCKS: [[NOT_MASTER]] // TLS-BLOCKS: load i{{[0-9]+}}, i{{[0-9]+}}* [[G_CAPTURE_SRC]], - // TLS-BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* [[G_CAPTURE_DST]], + // TLS-BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* [[G_CAPTURE_DST]], align 128 // TLS-BLOCKS: [[DONE]] // BLOCKS: call {{.*}}i32 @__kmpc_cancel_barrier( @@ -381,8 +381,8 @@ int main() { // CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}} // CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] // CHECK: [[NOT_MASTER]] -// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], -// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, +// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], align 128 +// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128 // TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** % // TLS-CHECK: [[MASTER_REF1:%.+]] = load [2 x i32]*, [2 x i32]** % @@ -393,8 +393,8 @@ int main() { // TLS-CHECK: icmp ne i{{[0-9]+}} [[MASTER_LONG]], ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}) // TLS-CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] // TLS-CHECK: [[NOT_MASTER]] -// TLS-CHECK: [[MASTER_VAL:%.+]] = load i32, i32* [[MASTER_REF]] -// TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[TMAIN_T_VAR]] +// TLS-CHECK: [[MASTER_VAL:%.+]] = load i32, i32* [[MASTER_REF]], +// TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[TMAIN_T_VAR]], align 128 // threadprivate_vec = vec; // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]] diff --git a/test/OpenMP/parallel_firstprivate_codegen.cpp b/test/OpenMP/parallel_firstprivate_codegen.cpp index 419af7992b..a3f28a930c 100644 --- a/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -16,7 +16,7 @@ struct St { ~St() {} }; -volatile int g = 1212; +volatile int g __attribute__((aligned(128))) = 1212; template struct S { @@ -36,10 +36,10 @@ struct S { template T tmain() { S test; - T t_var = T(); - T vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S var(3); + T t_var __attribute__((aligned(128))) = T(); + T vec[] __attribute__((aligned(128))) = {1, 2}; + S s_arr[] __attribute__((aligned(128))) = {1, 2}; + S var __attribute__((aligned(128))) (3); #pragma omp parallel firstprivate(t_var, vec, s_arr, var) { vec[0] = t_var; @@ -61,7 +61,7 @@ int main() { #pragma omp parallel firstprivate(g) { // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}) - // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128 // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]] // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]] // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]] @@ -93,10 +93,10 @@ int main() { #pragma omp parallel firstprivate(g) { // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}) - // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128 // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]] - // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]] - // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]] + // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]], + // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]], // BLOCKS: call {{.*}}i32 @__kmpc_cancel_barrier( g = 1; // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], @@ -185,10 +185,10 @@ int main() { // CHECK: ret // // CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}}) -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128 +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128 +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128 +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** % diff --git a/test/OpenMP/parallel_private_codegen.cpp b/test/OpenMP/parallel_private_codegen.cpp index 616d50fae9..ec26c6066c 100644 --- a/test/OpenMP/parallel_private_codegen.cpp +++ b/test/OpenMP/parallel_private_codegen.cpp @@ -16,17 +16,17 @@ struct S { ~S() {} }; -volatile int g = 1212; +volatile int g __attribute__((aligned(128))) = 1212; // CHECK: [[S_FLOAT_TY:%.+]] = type { float } // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } template T tmain() { S test; - T t_var = T(); - T vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S var(3); + T t_var __attribute__((aligned(128))) = T(); + T vec[] __attribute__((aligned(128))) = {1, 2}; + S s_arr[] __attribute__((aligned(128))) = {1, 2}; + S var __attribute__((aligned(128))) (3); #pragma omp parallel private(t_var, vec, s_arr, var) { vec[0] = t_var; @@ -49,7 +49,7 @@ int main() { // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, g = 1; - // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) @@ -78,7 +78,7 @@ int main() { // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, g = 1; - // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 // BLOCKS-NOT: [[G]]{{[[^:word:]]}} // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]] // BLOCKS-NOT: [[G]]{{[[^:word:]]}} @@ -143,10 +143,10 @@ int main() { // CHECK: ret // // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128 +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128 +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128 +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] // CHECK-NOT: [[T_VAR_PRIV]] // CHECK-NOT: [[VEC_PRIV]] diff --git a/test/OpenMP/parallel_reduction_codegen.cpp b/test/OpenMP/parallel_reduction_codegen.cpp index 6ecd42bb94..2ee4239d2a 100644 --- a/test/OpenMP/parallel_reduction_codegen.cpp +++ b/test/OpenMP/parallel_reduction_codegen.cpp @@ -8,7 +8,7 @@ #ifndef HEADER #define HEADER -volatile int g = 1212; +volatile int g __attribute__((aligned(128))) = 1212; template struct S { @@ -30,10 +30,10 @@ template T tmain() { T t; S test; - T t_var = T(), t_var1; + T t_var __attribute__((aligned(128))) = T(), t_var1 __attribute__((aligned(128))); T vec[] = {1, 2}; - S s_arr[] = {1, 2}; - S var(3), var1; + S s_arr[] = {1, 2}; + S var __attribute__((aligned(128))) (3), var1 __attribute__((aligned(128))); #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) { vec[0] = t_var; @@ -59,9 +59,9 @@ int main() { // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x i8*], // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]] - // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]] + // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 g = 1; - // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) @@ -114,9 +114,9 @@ int main() { // BLOCKS: [[RED_LIST:%.+]] = alloca [1 x i8*], // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]] - // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]] + // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 g = 1; - // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 // BLOCKS-NOT: [[G]]{{[[^:word:]]}} // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]] // BLOCKS-NOT: [[G]]{{[[^:word:]]}} @@ -435,10 +435,10 @@ int main() { // CHECK: ret // // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[T_VAR1_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128 +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 +// CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 +// CHECK: [[T_VAR1_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128 // Reduction list for runtime. // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*], -- 2.40.0