[OpenMP] Make default distribute schedule for NVPTX target regions in SPMD mode achie...

author Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>

Thu, 27 Sep 2018 19:22:56 +0000 (19:22 +0000)

committer Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>

Thu, 27 Sep 2018 19:22:56 +0000 (19:22 +0000)
author Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Thu, 27 Sep 2018 19:22:56 +0000 (19:22 +0000)
committer Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Thu, 27 Sep 2018 19:22:56 +0000 (19:22 +0000)
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h

index e0685d9bc66091db588aaab25099def1b9ca5808..982aeb3cf7ae1dad36fd1d5ee33b25496bbc3cbb 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -1490,6 +1490,12 @@ public:
                                        const VarDecl *NativeParam,
                                        const VarDecl *TargetParam) const;
  
+  /// Choose default schedule type and chunk value for the
+  /// dist_schedule clause.
+  virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+      llvm::Value *&Chunk) const {}
+
    /// Emits call of the outlined function with the provided arguments,
    /// translating these arguments to correct target-specific arguments.
    virtual void
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

index 45aafaa5c3cd36be39860db233f27dffdcefeacb..56b244d0ae8802c090fb0c7999d26f8f3ba5faaf 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4081,3 +4081,15 @@ void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
    FunctionGlobalizedDecls.erase(CGF.CurFn);
    CGOpenMPRuntime::functionFinished(CGF);
  }
+
+void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    OpenMPDistScheduleClauseKind &ScheduleKind,
+    llvm::Value *&Chunk) const {
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
+    ScheduleKind = OMPC_DIST_SCHEDULE_static;
+    Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
+        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+        S.getIterationVariable()->getType(), S.getBeginLoc());
+  }
+}
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h

index 85ed838d473d4af3744fd21fff3847670448c904..76343dfc7f5a9e28bb21fe5d1a8110d591a9c490 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -340,6 +340,11 @@ public:
    ///
    void functionFinished(CodeGenFunction &CGF) override;
  
+  /// Choose a default value for the schedule clause.
+  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+      llvm::Value *&Chunk) const override;
+
  private:
    /// Track the execution mode when codegening directives within a target
    /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp

index 7305b0f3213ec4c3cca16768b8866f12238f59ce..4bafb8ba9fc9c51b6afe7db666923b1e5473832b 100644 (file)
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3325,6 +3325,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
                                         S.getIterationVariable()->getType(),
                                         S.getBeginLoc());
          }
+      } else {
+        // Default behaviour for dist_schedule clause.
+        CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
+            *this, S, ScheduleKind, Chunk);
        }
        const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
        const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp

index 2fdcbe683d5335bbde914778545324c42de9180a..19993351198f83169fde2c1a97688114a9ecf4a8 100644 (file)
--- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -35,7 +35,7 @@ tx ftemplate(int n) {
      l = i;
    }
  
-  #pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64)
+#pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64)
    for(int i = 0; i < n; i++) {
      aa[i] += 1;
    }
@@ -87,7 +87,7 @@ int bar(int n){
  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
  // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
  // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
  // CHECK: {{call|invoke}} void [[OUTL2:@.+]](
  // CHECK: call void @__kmpc_for_static_fini(
  // CHECK: call void @__kmpc_spmd_kernel_deinit()
@@ -101,7 +101,7 @@ int bar(int n){
  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
  // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
  // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
  // CHECK: {{call|invoke}} void [[OUTL3:@.+]](
  // CHECK: call void @__kmpc_for_static_fini(
  // CHECK: call void @__kmpc_spmd_kernel_deinit()
@@ -117,7 +117,7 @@ int bar(int n){
  // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
  // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
  // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
  // CHECK: {{call|invoke}} void [[OUTL4:@.+]](
  // CHECK: call void @__kmpc_for_static_fini(
  // CHECK: call void @__kmpc_spmd_kernel_deinit()
diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp

index 0f0f12c554b19559e0497569958126cc04f610c3..92436c40f46832e04f695a1ca1119dba602442b4 100644 (file)
--- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -33,7 +33,7 @@ tx ftemplate(int n) {
      l = i;
    }
  
-  #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
+ #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
    for(int i = 0; i < n; i++) {
      aa[i] += 1;
    }
@@ -82,7 +82,7 @@ int bar(int n){
  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
  // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
  // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
  // CHECK: {{call|invoke}} void [[OUTL2:@.+]](
  // CHECK: call void @__kmpc_for_static_fini(
  // CHECK: call void @__kmpc_spmd_kernel_deinit()
@@ -96,7 +96,7 @@ int bar(int n){
  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
  // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
  // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
  // CHECK: {{call|invoke}} void [[OUTL3:@.+]](
  // CHECK: call void @__kmpc_for_static_fini(
  // CHECK: call void @__kmpc_spmd_kernel_deinit()
@@ -112,7 +112,7 @@ int bar(int n){
  // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
  // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
  // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
  // CHECK: {{call|invoke}} void [[OUTL4:@.+]](
  // CHECK: call void @__kmpc_for_static_fini(
  // CHECK: call void @__kmpc_spmd_kernel_deinit()
author	Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
	Thu, 27 Sep 2018 19:22:56 +0000 (19:22 +0000)
committer	Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
	Thu, 27 Sep 2018 19:22:56 +0000 (19:22 +0000)
lib/CodeGen/CGOpenMPRuntime.h		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntimeNVPTX.h		patch \| blob \| history
lib/CodeGen/CGStmtOpenMP.cpp		patch \| blob \| history
test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp		patch \| blob \| history
test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp		patch \| blob \| history