From 6727779002a03fed12a7303f69fad1825150b565 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Mon, 23 Apr 2018 17:33:41 +0000
Subject: [PATCH] [OPENMP] Do not cast captured by value variables with pointer
 types in NVPTX target.

When generating the wrapper function for the offloading region, we need
to call the outlined function and cast the arguments correctly to follow
the ABI. Usually, variables captured by value are casted to `uintptr_t`
type. But this should not performed for the variables with pointer type.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@330620 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp            |  3 ++-
 ...et_teams_distribute_parallel_for_codegen.cpp | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 7abe2d741a..59858d3430 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -3037,7 +3037,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
                                               /*Volatile=*/false,
                                               CGFContext.getPointerType(ElemTy),
                                               CI->getLocation());
-      if (CI->capturesVariableByCopy()) {
+      if (CI->capturesVariableByCopy() &&
+          !CI->getCapturedVar()->getType()->isAnyPointerType()) {
         Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
                               CI->getLocation());
       }
diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
index 73647c4112..124766e1f0 100644
--- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -9,10 +9,11 @@
 #define HEADER
 
 // Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0
 
 #define N 1000
 #define M 10
@@ -26,6 +27,7 @@ tx ftemplate(int n) {
   tx f = n;
   tx l;
   int k;
+  tx *v;
 
 #pragma omp target teams distribute parallel for lastprivate(l) dist_schedule(static,128) schedule(static,32)
   for(int i = 0; i < n; i++) {
@@ -51,6 +53,9 @@ tx ftemplate(int n) {
     }
   }
 
+#pragma omp target teams distribute parallel for map(a, v[:N])
+  for(int i = 0; i < n; i++)
+    a[i] = v[i];
   return a[0];
 }
 
@@ -120,4 +125,8 @@ int bar(int n){
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: ret void
 
+// CHECK: define void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
+// CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}})
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}})
+
 #endif
-- 
2.40.0