]> granicus.if.org Git - clang/commitdiff
[OPENMP]Add call to __kmpc_push_target_tripcount() function.
authorAlexey Bataev <a.bataev@hotmail.com>
Mon, 7 Jan 2019 21:30:43 +0000 (21:30 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Mon, 7 Jan 2019 21:30:43 +0000 (21:30 +0000)
Each we create the target regions with the teams distribute inner
region, we can better estimate number of the teams required to execute
the target region. Function __kmpc_push_target_tripcount() is used for
purpose, which accepts device_id and the number of the iterations,
performed by the associated loop.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@350571 91177308-0d34-0410-b5e6-96231b3b80d8

12 files changed:
lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
lib/CodeGen/CGStmtOpenMP.cpp
test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
test/OpenMP/teams_distribute_codegen.cpp
test/OpenMP/teams_distribute_parallel_for_codegen.cpp
test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
test/OpenMP/teams_distribute_simd_codegen.cpp

index 81760938406b4cb727b621a37af7a1de6c608f26..20eb0b29f427dad63b90f9d4db51711dbe016a1a 100644 (file)
@@ -673,6 +673,9 @@ enum OpenMPRTLFunction {
   //
   // Offloading related calls
   //
+  // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+  // size);
+  OMPRTL__kmpc_push_target_tripcount,
   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
   // *arg_types);
@@ -2163,6 +2166,15 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
     break;
   }
+  case OMPRTL__kmpc_push_target_tripcount: {
+    // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+    // size);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
+    break;
+  }
   case OMPRTL__tgt_target: {
     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
@@ -8053,6 +8065,183 @@ static void emitOffloadingArraysArgument(
   }
 }
 
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+  // We can skip constant expressions.
+  // We can skip expressions with trivial calls or simple expressions.
+  return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+          !E->hasNonTrivialCall(Ctx)) &&
+         !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
+
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one that is not evaluatable at the compile time.
+static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
+  if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
+    const Stmt *Child = nullptr;
+    for (const Stmt *S : C->body()) {
+      if (const auto *E = dyn_cast<Expr>(S)) {
+        if (isTrivial(Ctx, E))
+          continue;
+      }
+      // Some of the statements can be ignored.
+      if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+          isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+        continue;
+      // Analyze declarations.
+      if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+        if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+              if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+                  isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+                  isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+                  isa<UsingDirectiveDecl>(D) ||
+                  isa<OMPDeclareReductionDecl>(D) ||
+                  isa<OMPThreadPrivateDecl>(D))
+                return true;
+              const auto *VD = dyn_cast<VarDecl>(D);
+              if (!VD)
+                return false;
+              return VD->isConstexpr() ||
+                     ((VD->getType().isTrivialType(Ctx) ||
+                       VD->getType()->isReferenceType()) &&
+                      (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+            }))
+          continue;
+      }
+      // Found multiple children - cannot get the one child only.
+      if (Child)
+        return Body;
+      Child = S;
+    }
+    if (Child)
+      return Child;
+  }
+  return Body;
+}
+
+/// Check for inner distribute directive.
+static const OMPExecutableDirective *
+getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
+  const auto *CS = D.getInnermostCapturedStmt();
+  const auto *Body =
+      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+  const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
+
+  if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+    switch (D.getDirectiveKind()) {
+    case OMPD_target:
+      if (isOpenMPDistributeDirective(DKind))
+        return NestedDir;
+      if (DKind == OMPD_teams) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return nullptr;
+        ChildStmt = getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPDistributeDirective(DKind))
+            return NND;
+        }
+      }
+      return nullptr;
+    case OMPD_target_teams:
+      if (isOpenMPDistributeDirective(DKind))
+        return NestedDir;
+      return nullptr;
+    case OMPD_target_parallel:
+    case OMPD_target_simd:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
+      return nullptr;
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+    case OMPD_parallel:
+    case OMPD_for:
+    case OMPD_parallel_for:
+    case OMPD_parallel_sections:
+    case OMPD_for_simd:
+    case OMPD_parallel_for_simd:
+    case OMPD_cancel:
+    case OMPD_cancellation_point:
+    case OMPD_ordered:
+    case OMPD_threadprivate:
+    case OMPD_task:
+    case OMPD_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_taskgroup:
+    case OMPD_atomic:
+    case OMPD_flush:
+    case OMPD_teams:
+    case OMPD_target_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_enter_data:
+    case OMPD_distribute:
+    case OMPD_distribute_simd:
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_target_update:
+    case OMPD_declare_simd:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_declare_reduction:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_requires:
+    case OMPD_unknown:
+      llvm_unreachable("Unexpected directive.");
+    }
+  }
+
+  return nullptr;
+}
+
+void CGOpenMPRuntime::emitTargetNumIterationsCall(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+    const llvm::function_ref<llvm::Value *(
+        CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+  OpenMPDirectiveKind Kind = D.getDirectiveKind();
+  const OMPExecutableDirective *TD = &D;
+  // Get nested teams distribute kind directive, if any.
+  if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
+    TD = getNestedDistributeDirective(CGM.getContext(), D);
+  if (!TD)
+    return;
+  const auto *LD = cast<OMPLoopDirective>(TD);
+  auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+                                                     PrePostActionTy &) {
+    llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
+
+    // Emit device ID if any.
+    llvm::Value *DeviceID;
+    if (Device)
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGF.Int64Ty, /*isSigned=*/true);
+    else
+      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+
+    llvm::Value *Args[] = {DeviceID, NumIterations};
+    CGF.EmitRuntimeCall(
+        createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+  };
+  emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
+}
+
 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
                                      const OMPExecutableDirective &D,
                                      llvm::Value *OutlinedFn,
index d9ac5df36b73c1c80a15c154f7aeade47dcca275..933c8af116d3b3ddfda00bdd11c2bd3cfb76de77 100644 (file)
@@ -1367,6 +1367,15 @@ public:
                                           bool IsOffloadEntry,
                                           const RegionCodeGenTy &CodeGen);
 
+  /// Emit code that pushes the trip count of loops associated with constructs
+  /// 'target teams distribute' and 'teams distribute parallel for'.
+  /// \param SizeEmitter Emits the int64 value for the number of iterations of
+  /// the associated loop.
+  virtual void emitTargetNumIterationsCall(
+      CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+      const llvm::function_ref<llvm::Value *(
+          CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter);
+
   /// Emit the target offloading code associated with \a D. The emitted
   /// code attempts offloading the execution to the device, an the event of
   /// a failure it executes the host version outlined in \a OutlinedFn.
index 403aefbef9ae8bc4fff9d9b7c052257857762145..7046ab3aa35ca4636fe4e9f4962c24e8513f2199 100644 (file)
@@ -705,8 +705,8 @@ getDataSharingMode(CodeGenModule &CGM) {
                                           : CGOpenMPRuntimeNVPTX::Generic;
 }
 
-// Checks if the expression is constant or does not have non-trivial function
-// calls.
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
   // We can skip constant expressions.
   // We can skip expressions with trivial calls or simple expressions.
index ee385930843b383c23ae5547cf2d50e8be095d7b..650f4e8b91aad355c9a03360acb25e6de8598fe0 100644 (file)
@@ -4071,6 +4071,16 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
                                                     IsOffloadEntry, CodeGen);
   OMPLexicalScope Scope(CGF, S, OMPD_task);
+  auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) {
+    OMPLoopScope(CGF, D);
+    // Emit calculation of the iterations count.
+    llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
+    NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
+                                              /*IsSigned=*/false);
+    return NumIterations;
+  };
+  CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device,
+                                                     SizeEmitter);
   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
 }
 
index 4d263cf69f02a0c129a7eb0a50b8b26b36cab450..e439431a0518259b45b46b943b9f0da7d81a8a3b 100644 (file)
@@ -51,9 +51,13 @@ int target_teams_fun(int *g){
   // discard capture expressions for te and th
   // HCK1: = alloca i32,
   // HCK1: = alloca i32,
+  // HCK1: = alloca i32,
+  // HCK1: = alloca i32,
+  // HCK1: = alloca i32,
   // HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
   // HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
   // HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
   // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
   // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
index 8cd90a610b5cbb25269e91a911e4b4363cec733a..af6ed9744a2cc3bca41c499fd4f7e7739285fb3e 100644 (file)
@@ -22,8 +22,10 @@ int Arg;
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp target teams distribute parallel for
@@ -78,9 +80,12 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
index 0329d7e2789ecd88910a1953abbc7a14aca8f313..7a1ea810911ee472107da1c922f4e447ca3be17f 100644 (file)
@@ -52,10 +52,14 @@ int target_teams_fun(int *g){
 // discard capture expressions for te and th
 // HCK1: = alloca i32,
 // HCK1: = alloca i32,
+// HCK1: = alloca i32,
+// HCK1: = alloca i32,
+// HCK1: = alloca i32,
 // HCK1: [[I_CAST:%.+]] = alloca i{{32|64}},
 // HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
 // HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
 // HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
 // HCK1: [[I_PAR:%.+]] = load{{.+}}, {{.+}} [[I_CAST]],
 // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
 // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
@@ -73,7 +77,8 @@ int target_teams_fun(int *g){
   // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   {{{
   #pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8)
-  for(int i = 0; i < n; i++) {
+  for(
+    int i = 0; i < n; i++) {
     a[i] = g[0];
   }
   }}}
index a3a14fbf1e6855127ad4d888c5b8338fe19e9957..b6778417be672c23dcf2d62d7e66ac0deb539edb 100644 (file)
@@ -22,8 +22,10 @@ int Arg;
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp target teams distribute parallel for simd
@@ -78,9 +80,12 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
index 0f6b5f2f25e118e05e870f48aa16ecbfe7a32aab..ea299bc346a66fac5f5f926fbb3fa6fe9c637a18 100644 (file)
 int a[100];
 
 // CK1: define {{.*}}i32 @{{.+}}teams_argument_globali(
-int teams_argument_global(int n){  
+int teams_argument_global(int n){
   int te = n / 128;
   int th = 128;
   // discard n_addr
   // CK1: alloca i32,
   // CK1: [[TE:%.+]] = alloca i32,
   // CK1: [[TH:%.+]] = alloca i32,
+  // CK1: alloca i32,
+  // CK1: alloca i32,
+  // CK1: alloca i32,
   // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
   // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
index 865ea129f9790eb8d1e1cba0d48aceb451514dce..fa425b3ecab0ce450a7320f0a57b8880a608f1ea 100644 (file)
@@ -28,8 +28,12 @@ int teams_argument_global(int n){
   // CK1: alloca i32,
   // CK1: [[TE:%.+]] = alloca i32,
   // CK1: [[TH:%.+]] = alloca i32,
+  // CK1: alloca i32,
+  // CK1: alloca i32,
+  // CK1: alloca i32,
   // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
   // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
   // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
index 784deeef7cd2fa73ca865318d792e8007eb5b224..45793419d7c4b524f000ca846984e302bf15d94c 100644 (file)
@@ -28,8 +28,12 @@ int teams_argument_global(int n){
   // CK1: alloca i32,
   // CK1: [[TE:%.+]] = alloca i32,
   // CK1: [[TH:%.+]] = alloca i32,
+  // CK1: alloca i32,
+  // CK1: alloca i32,
+  // CK1: alloca i32,
   // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
   // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
index 813d3912c3985ec66b7a5b25e5968c9ee325f2da..c89a936b64f600b47a8f7c5245b679274904c897 100644 (file)
@@ -30,8 +30,12 @@ int teams_argument_global(int n) {
   // CK1: alloca i32,
   // CK1: [[TE:%.+]] = alloca i32,
   // CK1: [[TH:%.+]] = alloca i32,
+  // CK1: alloca i32,
+  // CK1: alloca i32,
+  // CK1: alloca i32,
   // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
   // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],