From 706a4b82124306be1f8927978e8c44348ada66ea Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 16 Jun 2015 11:59:36 +0000 Subject: [PATCH] [OPENMP] Remove last iteration separation for loop-based constructs. Previously the last iteration for simd loop-based OpenMP constructs were generated as a separate code. This feature is not required and codegen is simplified. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@239810 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/AST/StmtOpenMP.h | 39 ++++++++++++----------------- lib/AST/Stmt.cpp | 10 ++++---- lib/CodeGen/CGStmtOpenMP.cpp | 20 +++++---------- lib/CodeGen/CodeGenFunction.h | 3 +-- lib/Sema/SemaOpenMP.cpp | 4 --- lib/Serialization/ASTReaderStmt.cpp | 4 +-- lib/Serialization/ASTWriterStmt.cpp | 3 +-- test/OpenMP/simd_codegen.cpp | 13 ++-------- 8 files changed, 32 insertions(+), 64 deletions(-) diff --git a/include/clang/AST/StmtOpenMP.h b/include/clang/AST/StmtOpenMP.h index 5161eff099..01c81a05ad 100644 --- a/include/clang/AST/StmtOpenMP.h +++ b/include/clang/AST/StmtOpenMP.h @@ -285,24 +285,23 @@ class OMPLoopDirective : public OMPExecutableDirective { CalcLastIterationOffset = 3, PreConditionOffset = 4, CondOffset = 5, - SeparatedCondOffset = 6, - InitOffset = 7, - IncOffset = 8, + InitOffset = 6, + IncOffset = 7, // The '...End' enumerators do not correspond to child expressions - they // specify the offset to the end (and start of the following counters/ // updates/finals arrays). - DefaultEnd = 9, + DefaultEnd = 8, // The following 7 exprs are used by worksharing loops only. - IsLastIterVariableOffset = 9, - LowerBoundVariableOffset = 10, - UpperBoundVariableOffset = 11, - StrideVariableOffset = 12, - EnsureUpperBoundOffset = 13, - NextLowerBoundOffset = 14, - NextUpperBoundOffset = 15, + IsLastIterVariableOffset = 8, + LowerBoundVariableOffset = 9, + UpperBoundVariableOffset = 10, + StrideVariableOffset = 11, + EnsureUpperBoundOffset = 12, + NextLowerBoundOffset = 13, + NextUpperBoundOffset = 14, // Offset to the end (and start of the following counters/updates/finals // arrays) for worksharing loop directives. - WorksharingEnd = 16, + WorksharingEnd = 15, }; /// \brief Get the counters storage. @@ -374,9 +373,8 @@ protected: void setPreCond(Expr *PC) { *std::next(child_begin(), PreConditionOffset) = PC; } - void setCond(Expr *Cond, Expr *SeparatedCond) { + void setCond(Expr *Cond) { *std::next(child_begin(), CondOffset) = Cond; - *std::next(child_begin(), SeparatedCondOffset) = SeparatedCond; } void setInit(Expr *Init) { *std::next(child_begin(), InitOffset) = Init; } void setInc(Expr *Inc) { *std::next(child_begin(), IncOffset) = Inc; } @@ -435,8 +433,6 @@ public: Expr *PreCond; /// \brief Loop condition. Expr *Cond; - /// \brief A condition with 1 iteration separated. - Expr *SeparatedCond; /// \brief Loop iteration variable init. Expr *Init; /// \brief Loop increment. @@ -467,8 +463,7 @@ public: bool builtAll() { return IterationVarRef != nullptr && LastIteration != nullptr && NumIterations != nullptr && PreCond != nullptr && - Cond != nullptr && SeparatedCond != nullptr && Init != nullptr && - Inc != nullptr; + Cond != nullptr && Init != nullptr && Inc != nullptr; } /// \brief Initialize all the fields to null. @@ -479,7 +474,6 @@ public: CalcLastIteration = nullptr; PreCond = nullptr; Cond = nullptr; - SeparatedCond = nullptr; Init = nullptr; Inc = nullptr; IL = nullptr; @@ -519,10 +513,9 @@ public: return const_cast(reinterpret_cast( *std::next(child_begin(), PreConditionOffset))); } - Expr *getCond(bool SeparateIter) const { - return const_cast(reinterpret_cast( - *std::next(child_begin(), - (SeparateIter ? SeparatedCondOffset : CondOffset)))); + Expr *getCond() const { + return const_cast( + reinterpret_cast(*std::next(child_begin(), CondOffset))); } Expr *getInit() const { return const_cast( diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp index 09bb17b0e7..d2a6a9c7ee 100644 --- a/lib/AST/Stmt.cpp +++ b/lib/AST/Stmt.cpp @@ -1636,7 +1636,7 @@ OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, Dir->setLastIteration(Exprs.LastIteration); Dir->setCalcLastIteration(Exprs.CalcLastIteration); Dir->setPreCond(Exprs.PreCond); - Dir->setCond(Exprs.Cond, Exprs.SeparatedCond); + Dir->setCond(Exprs.Cond); Dir->setInit(Exprs.Init); Dir->setInc(Exprs.Inc); Dir->setCounters(Exprs.Counters); @@ -1675,7 +1675,7 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc, Dir->setLastIteration(Exprs.LastIteration); Dir->setCalcLastIteration(Exprs.CalcLastIteration); Dir->setPreCond(Exprs.PreCond); - Dir->setCond(Exprs.Cond, Exprs.SeparatedCond); + Dir->setCond(Exprs.Cond); Dir->setInit(Exprs.Init); Dir->setInc(Exprs.Inc); Dir->setIsLastIterVariable(Exprs.IL); @@ -1721,7 +1721,7 @@ OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, Dir->setLastIteration(Exprs.LastIteration); Dir->setCalcLastIteration(Exprs.CalcLastIteration); Dir->setPreCond(Exprs.PreCond); - Dir->setCond(Exprs.Cond, Exprs.SeparatedCond); + Dir->setCond(Exprs.Cond); Dir->setInit(Exprs.Init); Dir->setInc(Exprs.Inc); Dir->setIsLastIterVariable(Exprs.IL); @@ -1876,7 +1876,7 @@ OMPParallelForDirective *OMPParallelForDirective::Create( Dir->setLastIteration(Exprs.LastIteration); Dir->setCalcLastIteration(Exprs.CalcLastIteration); Dir->setPreCond(Exprs.PreCond); - Dir->setCond(Exprs.Cond, Exprs.SeparatedCond); + Dir->setCond(Exprs.Cond); Dir->setInit(Exprs.Init); Dir->setInc(Exprs.Inc); Dir->setIsLastIterVariable(Exprs.IL); @@ -1920,7 +1920,7 @@ OMPParallelForSimdDirective *OMPParallelForSimdDirective::Create( Dir->setLastIteration(Exprs.LastIteration); Dir->setCalcLastIteration(Exprs.CalcLastIteration); Dir->setPreCond(Exprs.PreCond); - Dir->setCond(Exprs.Cond, Exprs.SeparatedCond); + Dir->setCond(Exprs.Cond); Dir->setInit(Exprs.Init); Dir->setInc(Exprs.Inc); Dir->setIsLastIterVariable(Exprs.IL); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 895baa72a7..a6da3891af 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -489,8 +489,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { emitCommonOMPParallelDirective(*this, S, CodeGen); } -void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, - bool SeparateIter) { +void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S) { RunCleanupsScope BodyScope(*this); // Update counters values on current iteration. for (auto I : S.updates()) { @@ -512,12 +511,10 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, // The end (updates/cleanups). EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); - if (SeparateIter) { // TODO: Update lastprivates if the SeparateIter flag is true. // This will be implemented in a follow-up OMPLastprivateClause patch, but // result should be still correct without it, as we do not make these // variables private yet. - } } void CodeGenFunction::EmitOMPInnerLoop( @@ -734,7 +731,6 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGF.incrementProfileCounter(&S); } // Walk clauses and process safelen/lastprivate. - bool SeparateIter = false; CGF.LoopStack.setParallel(); CGF.LoopStack.setVectorizerEnable(true); for (auto C : S.clauses()) { @@ -754,7 +750,6 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { EmitOMPAlignedClause(CGF, CGF.CGM, cast(*C)); break; case OMPC_lastprivate: - SeparateIter = true; break; default: // Not handled yet @@ -805,15 +800,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGF.EmitOMPPrivateClause(S, LoopScope); (void)LoopScope.Privatize(); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(SeparateIter), S.getInc(), + S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { CGF.EmitOMPLoopBody(S); CGF.EmitStopPoint(&S); }, [](CodeGenFunction &) {}); - if (SeparateIter) { - CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true); - } } CGF.EmitOMPSimdFinal(S); // Emit: if (PreCond) - end. @@ -914,7 +906,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, // IV = LB EmitIgnoredExpr(S.getInit()); // IV < UB - BoolCondVal = EvaluateExprAsBool(S.getCond(false)); + BoolCondVal = EvaluateExprAsBool(S.getCond()); } else { BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, LB, UB, ST); @@ -950,7 +942,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, ScheduleKind == OMPC_SCHEDULE_guided) && !Ordered); EmitOMPInnerLoop( - S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false), + S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { CGF.EmitOMPLoopBody(S); @@ -1108,8 +1100,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // IV = LB; EmitIgnoredExpr(S.getInit()); // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(/*SeparateIter=*/false), S.getInc(), + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), [&S](CodeGenFunction &CGF) { CGF.EmitOMPLoopBody(S); CGF.EmitStopPoint(&S); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 469022dc62..b3c31f61d1 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2189,8 +2189,7 @@ public: private: /// Helpers for the OpenMP loop directives. - void EmitOMPLoopBody(const OMPLoopDirective &Directive, - bool SeparateIter = false); + void EmitOMPLoopBody(const OMPLoopDirective &Directive); void EmitOMPSimdFinal(const OMPLoopDirective &S); /// \brief Emit code for the worksharing loop-based directive. /// \return true, if this construct has any lastprivate clause, false - diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index cfe8db3d65..03d78f0bce 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -2939,9 +2939,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr, ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get()) : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(), NumIterations.get()); - // Loop condition with 1 iteration separated (IV < LastIteration) - ExprResult SeparatedCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, - IV.get(), LastIteration.get()); // Loop increment (IV = IV + 1) SourceLocation IncLoc; @@ -3071,7 +3068,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr, Built.CalcLastIteration = CalcLastIteration.get(); Built.PreCond = PreCond.get(); Built.Cond = Cond.get(); - Built.SeparatedCond = SeparatedCond.get(); Built.Init = Init.get(); Built.Inc = Inc.get(); Built.LB = LB.get(); diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp index d84b5be585..e0cd1898af 100644 --- a/lib/Serialization/ASTReaderStmt.cpp +++ b/lib/Serialization/ASTReaderStmt.cpp @@ -2073,9 +2073,7 @@ void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) { D->setLastIteration(Reader.ReadSubExpr()); D->setCalcLastIteration(Reader.ReadSubExpr()); D->setPreCond(Reader.ReadSubExpr()); - auto Fst = Reader.ReadSubExpr(); - auto Snd = Reader.ReadSubExpr(); - D->setCond(Fst, Snd); + D->setCond(Reader.ReadSubExpr()); D->setInit(Reader.ReadSubExpr()); D->setInc(Reader.ReadSubExpr()); if (isOpenMPWorksharingDirective(D->getDirectiveKind())) { diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp index 00356f8204..4c12d957df 100644 --- a/lib/Serialization/ASTWriterStmt.cpp +++ b/lib/Serialization/ASTWriterStmt.cpp @@ -1929,8 +1929,7 @@ void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) { Writer.AddStmt(D->getLastIteration()); Writer.AddStmt(D->getCalcLastIteration()); Writer.AddStmt(D->getPreCond()); - Writer.AddStmt(D->getCond(/* SeparateIter */ false)); - Writer.AddStmt(D->getCond(/* SeparateIter */ true)); + Writer.AddStmt(D->getCond()); Writer.AddStmt(D->getInit()); Writer.AddStmt(D->getInc()); if (isOpenMPWorksharingDirective(D->getDirectiveKind())) { diff --git a/test/OpenMP/simd_codegen.cpp b/test/OpenMP/simd_codegen.cpp index 586aaa5732..6b3170d5df 100644 --- a/test/OpenMP/simd_codegen.cpp +++ b/test/OpenMP/simd_codegen.cpp @@ -190,7 +190,7 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]] // CHECK: [[SIMD_LOOP7_COND]] // CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]] -// CHECK-NEXT: [[CMP7:%.+]] = icmp slt i64 [[IV7]], 6 +// CHECK-NEXT: [[CMP7:%.+]] = icmp slt i64 [[IV7]], 7 // CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]] for (long long i = -10; i < 10; i += 3) { // CHECK: [[SIMPLE_LOOP7_BODY]] @@ -205,16 +205,7 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]] } // CHECK: [[SIMPLE_LOOP7_END]] -// Separated last iteration. -// CHECK: [[IV7_4:%.+]] = load i64, i64* [[OMP_IV7]] -// CHECK-NEXT: [[LC_FIN_1:%.+]] = mul nsw i64 [[IV7_4]], 3 -// CHECK-NEXT: [[LC_FIN_2:%.+]] = add nsw i64 -10, [[LC_FIN_1]] -// CHECK-NEXT: store i64 [[LC_FIN_2]], i64* [[ADDR_I:%[^,]+]] -// CHECK: [[LOAD_I:%.+]] = load i64, i64* [[ADDR_I]] -// CHECK-NEXT: [[CONV_I:%.+]] = trunc i64 [[LOAD_I]] to i32 -// - -// CHECK: ret void +// CHECK-NEXT: ret void } template T tfoo(T a) { return a + K; } -- 2.40.0