]> granicus.if.org Git - clang/commitdiff
[OPENMP 4.1] Codegen for ‘simd’ clause in ‘ordered’ directive.
authorAlexey Bataev <a.bataev@hotmail.com>
Tue, 29 Sep 2015 03:48:57 +0000 (03:48 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Tue, 29 Sep 2015 03:48:57 +0000 (03:48 +0000)
Description.
If the simd clause is specified, the ordered regions encountered by any thread will use only a single SIMD lane to execute the ordered regions in the order of the loop iterations.
Restrictions.
An ordered construct with the simd clause is the only OpenMP construct that can appear in the simd region.

An ordered directive with ‘simd’ clause is generated as an outlined function and corresponding function call to prevent this part of code from vectorization later in backend.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@248772 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGStmtOpenMP.cpp
test/OpenMP/ordered_codegen.cpp

index 7251bd3ef66677d6b537f9be45b21898d4e00c0a..3b265b041d140219f82ab9697a61318186e8e7fe 100644 (file)
@@ -1548,21 +1548,21 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
 
 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
                                         const RegionCodeGenTy &OrderedOpGen,
-                                        SourceLocation Loc) {
+                                        SourceLocation Loc, bool IsThreads) {
   // __kmpc_ordered(ident_t *, gtid);
   // OrderedOpGen();
   // __kmpc_end_ordered(ident_t *, gtid);
   // Prepare arguments and build a call to __kmpc_ordered
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+  if (IsThreads) {
     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
     // Build a call to __kmpc_end_ordered
     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
         llvm::makeArrayRef(Args));
-    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
   }
+  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
 }
 
 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
index 011006a3f81559facdc24d4b0ae33a58a5c047a1..c84ea008fc764805f04849d7b96d5df1c1d6694d 100644 (file)
@@ -449,7 +449,7 @@ public:
   /// ordered region.
   virtual void emitOrderedRegion(CodeGenFunction &CGF,
                                  const RegionCodeGenTy &OrderedOpGen,
-                                 SourceLocation Loc);
+                                 SourceLocation Loc, bool IsThreads);
 
   /// \brief Emit an implicit/explicit barrier for OpenMP threads.
   /// \param Kind Directive for which this implicit barrier call must be
index a06b0a2492539319ce53ac3e04d96894d3bc4e58..45fa610b314c9e25da3a16d9e59b7855cc1d9952 100644 (file)
@@ -1799,13 +1799,33 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
   }(), S.getLocStart());
 }
 
+static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
+                                                   const CapturedStmt *S) {
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
+  CGF.CapturedStmtInfo = &CapStmtInfo;
+  auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
+  Fn->addFnAttr(llvm::Attribute::NoInline);
+  return Fn;
+}
+
 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
   LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  auto *C = S.getSingleClause<OMPSIMDClause>();
+  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
+    if (C) {
+      auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+      llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+      CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
+      auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
+      CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
+    } else {
+      CGF.EmitStmt(
+          cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    }
     CGF.EnsureInsertPoint();
   };
-  CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart());
+  CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
 }
 
 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
index e9a7b1390fe04f44b43d877e32cdc2aae987a2de..e77c1bed97caa6181816b50f5a36ff08cab3f11c 100644 (file)
@@ -213,5 +213,22 @@ void runtime(float *a, float *b, float *c, float *d) {
 // CHECK: ret void
 }
 
+float f[10];
+// CHECK-LABEL: foo_simd
+void foo_simd(int low, int up) {
+  // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access !
+  // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access !
+#pragma omp simd
+  for (int i = low; i < up; ++i) {
+    f[i] = 0.0;
+#pragma omp ordered simd
+    f[i] = 1.0;
+  }
+}
+
+// CHECK: define internal void [[CAP_FUNC]](i32* dereferenceable({{[0-9]+}}) %{{.+}}) #
+// CHECK: store float 1.000000e+00, float* %{{.+}}, align
+// CHECK-NEXT: ret void
+
 #endif // HEADER