From: Alexey Bataev Date: Tue, 29 Sep 2015 03:48:57 +0000 (+0000) Subject: [OPENMP 4.1] Codegen for ‘simd’ clause in ‘ordered’ directive. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=af2b6e30d8dd2974be238623bcb4ca384435259a;p=clang [OPENMP 4.1] Codegen for ‘simd’ clause in ‘ordered’ directive. Description. If the simd clause is specified, the ordered regions encountered by any thread will use only a single SIMD lane to execute the ordered regions in the order of the loop iterations. Restrictions. An ordered construct with the simd clause is the only OpenMP construct that can appear in the simd region. An ordered directive with ‘simd’ clause is generated as an outlined function and corresponding function call to prevent this part of code from vectorization later in backend. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@248772 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 7251bd3ef6..3b265b041d 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1548,21 +1548,21 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, - SourceLocation Loc) { + SourceLocation Loc, bool IsThreads) { // __kmpc_ordered(ident_t *, gtid); // OrderedOpGen(); // __kmpc_end_ordered(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_ordered - { - CodeGenFunction::RunCleanupsScope Scope(CGF); + CodeGenFunction::RunCleanupsScope Scope(CGF); + if (IsThreads) { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); // Build a call to __kmpc_end_ordered CGF.EHStack.pushCleanup::value>>( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), llvm::makeArrayRef(Args)); - emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } + emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 011006a3f8..c84ea008fc 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -449,7 +449,7 @@ public: /// ordered region. virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, - SourceLocation Loc); + SourceLocation Loc, bool IsThreads); /// \brief Emit an implicit/explicit barrier for OpenMP threads. /// \param Kind Directive for which this implicit barrier call must be diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index a06b0a2492..45fa610b31 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1799,13 +1799,33 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } +static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, + const CapturedStmt *S) { + CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); + CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; + CGF.CapturedStmtInfo = &CapStmtInfo; + auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); + Fn->addFnAttr(llvm::Attribute::NoInline); + return Fn; +} + void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + auto *C = S.getSingleClause(); + auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { + if (C) { + auto CS = cast(S.getAssociatedStmt()); + llvm::SmallVector CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); + CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); + } else { + CGF.EmitStmt( + cast(S.getAssociatedStmt())->getCapturedStmt()); + } CGF.EnsureInsertPoint(); }; - CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart()); + CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); } static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, diff --git a/test/OpenMP/ordered_codegen.cpp b/test/OpenMP/ordered_codegen.cpp index e9a7b1390f..e77c1bed97 100644 --- a/test/OpenMP/ordered_codegen.cpp +++ b/test/OpenMP/ordered_codegen.cpp @@ -213,5 +213,22 @@ void runtime(float *a, float *b, float *c, float *d) { // CHECK: ret void } +float f[10]; +// CHECK-LABEL: foo_simd +void foo_simd(int low, int up) { + // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access ! + // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access ! +#pragma omp simd + for (int i = low; i < up; ++i) { + f[i] = 0.0; +#pragma omp ordered simd + f[i] = 1.0; + } +} + +// CHECK: define internal void [[CAP_FUNC]](i32* dereferenceable({{[0-9]+}}) %{{.+}}) # +// CHECK: store float 1.000000e+00, float* %{{.+}}, align +// CHECK-NEXT: ret void + #endif // HEADER