]> granicus.if.org Git - clang/commitdiff
[OpenMP] Make default distribute schedule for NVPTX target regions in SPMD mode achie...
authorGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Thu, 27 Sep 2018 19:22:56 +0000 (19:22 +0000)
committerGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Thu, 27 Sep 2018 19:22:56 +0000 (19:22 +0000)
Summary: For the OpenMP NVPTX toolchain choose a default distribute schedule that ensures coalescing on the GPU when in SPMD mode. This significantly increases the performance of offloaded target code and reduces the number of registers used on the GPU side.

Reviewers: ABataev, caomhin, Hahnfeld

Reviewed By: ABataev, Hahnfeld

Subscribers: Hahnfeld, jholewinski, guansong, cfe-commits

Differential Revision: https://reviews.llvm.org/D52434

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@343253 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
lib/CodeGen/CGOpenMPRuntimeNVPTX.h
lib/CodeGen/CGStmtOpenMP.cpp
test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp

index e0685d9bc66091db588aaab25099def1b9ca5808..982aeb3cf7ae1dad36fd1d5ee33b25496bbc3cbb 100644 (file)
@@ -1490,6 +1490,12 @@ public:
                                       const VarDecl *NativeParam,
                                       const VarDecl *TargetParam) const;
 
+  /// Choose default schedule type and chunk value for the
+  /// dist_schedule clause.
+  virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+      llvm::Value *&Chunk) const {}
+
   /// Emits call of the outlined function with the provided arguments,
   /// translating these arguments to correct target-specific arguments.
   virtual void
index 45aafaa5c3cd36be39860db233f27dffdcefeacb..56b244d0ae8802c090fb0c7999d26f8f3ba5faaf 100644 (file)
@@ -4081,3 +4081,15 @@ void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
   FunctionGlobalizedDecls.erase(CGF.CurFn);
   CGOpenMPRuntime::functionFinished(CGF);
 }
+
+void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    OpenMPDistScheduleClauseKind &ScheduleKind,
+    llvm::Value *&Chunk) const {
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
+    ScheduleKind = OMPC_DIST_SCHEDULE_static;
+    Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
+        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+        S.getIterationVariable()->getType(), S.getBeginLoc());
+  }
+}
index 85ed838d473d4af3744fd21fff3847670448c904..76343dfc7f5a9e28bb21fe5d1a8110d591a9c490 100644 (file)
@@ -340,6 +340,11 @@ public:
   ///
   void functionFinished(CodeGenFunction &CGF) override;
 
+  /// Choose a default value for the schedule clause.
+  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+      llvm::Value *&Chunk) const override;
+
 private:
   /// Track the execution mode when codegening directives within a target
   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
index 7305b0f3213ec4c3cca16768b8866f12238f59ce..4bafb8ba9fc9c51b6afe7db666923b1e5473832b 100644 (file)
@@ -3325,6 +3325,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
                                        S.getIterationVariable()->getType(),
                                        S.getBeginLoc());
         }
+      } else {
+        // Default behaviour for dist_schedule clause.
+        CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
+            *this, S, ScheduleKind, Chunk);
       }
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
index 2fdcbe683d5335bbde914778545324c42de9180a..19993351198f83169fde2c1a97688114a9ecf4a8 100644 (file)
@@ -35,7 +35,7 @@ tx ftemplate(int n) {
     l = i;
   }
 
-  #pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64)
+#pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64)
   for(int i = 0; i < n; i++) {
     aa[i] += 1;
   }
@@ -87,7 +87,7 @@ int bar(int n){
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
 // CHECK: {{call|invoke}} void [[OUTL2:@.+]](
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
@@ -101,7 +101,7 @@ int bar(int n){
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
 // CHECK: {{call|invoke}} void [[OUTL3:@.+]](
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
@@ -117,7 +117,7 @@ int bar(int n){
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
 // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
 // CHECK: {{call|invoke}} void [[OUTL4:@.+]](
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
index 0f0f12c554b19559e0497569958126cc04f610c3..92436c40f46832e04f695a1ca1119dba602442b4 100644 (file)
@@ -33,7 +33,7 @@ tx ftemplate(int n) {
     l = i;
   }
 
 #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
+ #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
   for(int i = 0; i < n; i++) {
     aa[i] += 1;
   }
@@ -82,7 +82,7 @@ int bar(int n){
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
 // CHECK: {{call|invoke}} void [[OUTL2:@.+]](
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
@@ -96,7 +96,7 @@ int bar(int n){
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
 // CHECK: {{call|invoke}} void [[OUTL3:@.+]](
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
@@ -112,7 +112,7 @@ int bar(int n){
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
 // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
 // CHECK: {{call|invoke}} void [[OUTL4:@.+]](
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: call void @__kmpc_spmd_kernel_deinit()