[OpenMP][NVPTX][CUDA] Adding support for printf for an NVPTX OpenMP device.

author Arpith Chacko Jacob <acjacob@us.ibm.com>

Sun, 29 Jan 2017 20:49:31 +0000 (20:49 +0000)

committer Arpith Chacko Jacob <acjacob@us.ibm.com>

Sun, 29 Jan 2017 20:49:31 +0000 (20:49 +0000)
author Arpith Chacko Jacob <acjacob@us.ibm.com>
Sun, 29 Jan 2017 20:49:31 +0000 (20:49 +0000)
committer Arpith Chacko Jacob <acjacob@us.ibm.com>
Sun, 29 Jan 2017 20:49:31 +0000 (20:49 +0000)
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp

index b1feefe9f0de98536a8a7469c1be7d1432340999..c70a447303f49e9a101bfe1d1203ace33873c1e1 100644 (file)
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -2620,8 +2620,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
          Arg));
    }
    case Builtin::BIprintf:
-    if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
-      return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
+    if (getTarget().getTriple().isNVPTX())
+      return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
      break;
    case Builtin::BI__builtin_canonicalize:
    case Builtin::BI__builtin_canonicalizef:
diff --git a/lib/CodeGen/CGCUDABuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp

similarity index 91%

rename from lib/CodeGen/CGCUDABuiltin.cpp

rename to lib/CodeGen/CGGPUBuiltin.cpp

index 44dd003757adfeb5c58f4179fabcd0f8d192352b..48156b1b26b78f742546c7585ddf0bf787df0205 100644 (file)
--- a/lib/CodeGen/CGCUDABuiltin.cpp
+++ b/lib/CodeGen/CGGPUBuiltin.cpp
@@ -1,4 +1,4 @@
-//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===//
+//===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,8 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// Generates code for built-in CUDA calls which are not runtime-specific.
-// (Runtime-specific codegen lives in CGCUDARuntime.)
+// Generates code for built-in GPU calls which are not runtime-specific.
+// (Runtime-specific codegen lives in programming model specific files.)
  //
  //===----------------------------------------------------------------------===//
  
@@ -67,10 +67,9 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
  // Note that by the time this function runs, E's args have already undergone the
  // standard C vararg promotion (short -> int, float -> double, etc.).
  RValue
-CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E,
-                                              ReturnValueSlot ReturnValue) {
-  assert(getLangOpts().CUDA);
-  assert(getLangOpts().CUDAIsDevice);
+CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
+                                               ReturnValueSlot ReturnValue) {
+  assert(getTarget().getTriple().isNVPTX());
    assert(E->getBuiltinCallee() == Builtin::BIprintf);
    assert(E->getNumArgs() >= 1); // printf always has at least one arg.
  
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt

index 4fbf9f22a98db9c67084f7fc12d005a0c9a33b87..1e9a43af52297a7c1f221cfb58bbcb6f9883f95e 100644 (file)
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -36,7 +36,6 @@ add_clang_library(clangCodeGen
    CGAtomic.cpp
    CGBlocks.cpp
    CGBuiltin.cpp
-  CGCUDABuiltin.cpp
    CGCUDANV.cpp
    CGCUDARuntime.cpp
    CGCXX.cpp
@@ -55,6 +54,7 @@ add_clang_library(clangCodeGen
    CGExprComplex.cpp
    CGExprConstant.cpp
    CGExprScalar.cpp
+  CGGPUBuiltin.cpp
    CGLoopInfo.cpp
    CGObjC.cpp
    CGObjCGNU.cpp
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h

index 8d7fd3f80bad17be12eab9e8de6c39cc75bc5aa7..098767e23e77f27719b290fd5d35eb4838aed308 100644 (file)
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -3106,8 +3106,8 @@ public:
    RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
                                  ReturnValueSlot ReturnValue);
  
-  RValue EmitCUDADevicePrintfCallExpr(const CallExpr *E,
-                                      ReturnValueSlot ReturnValue);
+  RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
+                                       ReturnValueSlot ReturnValue);
  
    RValue EmitBuiltinExpr(const FunctionDecl *FD,
                           unsigned BuiltinID, const CallExpr *E,
diff --git a/test/OpenMP/nvptx_target_printf_codegen.c b/test/OpenMP/nvptx_target_printf_codegen.c

new file mode 100644 (file)

index 0000000..0de6885
--- /dev/null
+++ b/test/OpenMP/nvptx_target_printf_codegen.c
@@ -0,0 +1,116 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+#include <stdarg.h>
+
+// expected-no-diagnostics
+extern int printf(const char *, ...);
+extern int vprintf(const char *, va_list);
+
+// Check a simple call to printf end-to-end.
+// CHECK: [[SIMPLE_PRINTF_TY:%[a-zA-Z0-9_]+]] = type { i32, i64, double }
+int CheckSimple() {
+    // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+CheckSimple.+]]_worker()
+#pragma omp target
+  {
+    // Entry point.
+    // CHECK: define {{.*}}void [[T1]]()
+    // Alloca in entry block.
+    // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]]
+
+    // CHECK: {{call|invoke}} void [[T1]]_worker()
+    // CHECK: br label {{%?}}[[EXIT:.+]]
+    //
+    // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+    // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+    // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+    // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]],
+    // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
+    //
+    // CHECK: [[MASTER]]
+    // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+    // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+    // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+    // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
+
+    // printf in master-only basic block.
+    // CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt
+    const char* fmt = "%d %lld %f";
+    // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 0
+    // CHECK: store i32 1, i32* [[PTR0]], align 4
+    // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 1
+    // CHECK: store i64 2, i64* [[PTR1]], align 8
+    // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 2
+
+    // CHECK: store double 3.0{{[^,]*}}, double* [[PTR2]], align 8
+    // CHECK: [[BUF_CAST:%[0-9]+]] = bitcast [[SIMPLE_PRINTF_TY]]* [[BUF]] to i8*
+    // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8* [[FMT]], i8* [[BUF_CAST]])
+    printf(fmt, 1, 2ll, 3.0);
+  }
+
+  return 0;
+}
+
+void CheckNoArgs() {
+    // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+CheckNoArgs.+]]_worker()
+#pragma omp target
+  {
+    // Entry point.
+    // CHECK: define {{.*}}void [[T2]]()
+
+    // CHECK: {{call|invoke}} void [[T2]]_worker()
+    // CHECK: br label {{%?}}[[EXIT:.+]]
+    //
+    // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+    // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+    // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+    // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]],
+    // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
+    //
+    // CHECK: [[MASTER]]
+    // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+    // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+    // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+    // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
+
+    // printf in master-only basic block.
+    // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
+    printf("hello, world!");
+  }
+}
+
+// Check that printf's alloca happens in the entry block, not inside the if
+// statement.
+int foo;
+void CheckAllocaIsInEntryBlock() {
+    // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+CheckAllocaIsInEntryBlock.+]]_worker()
+#pragma omp target
+  {
+    // Entry point.
+    // CHECK: define {{.*}}void [[T3]](
+    // Alloca in entry block.
+    // CHECK: alloca %printf_args
+
+    // CHECK: {{call|invoke}} void [[T3]]_worker()
+    // CHECK: br label {{%?}}[[EXIT:.+]]
+    //
+    // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+    // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+    // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+    // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]],
+    // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
+    //
+    // CHECK: [[MASTER]]
+    // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+    // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+    // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+    // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
+
+    if (foo) {
+      printf("%d", 42);
+    }
+  }
+}
author	Arpith Chacko Jacob <acjacob@us.ibm.com>
	Sun, 29 Jan 2017 20:49:31 +0000 (20:49 +0000)
committer	Arpith Chacko Jacob <acjacob@us.ibm.com>
	Sun, 29 Jan 2017 20:49:31 +0000 (20:49 +0000)
lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
lib/CodeGen/CGGPUBuiltin.cpp	[moved from lib/CodeGen/CGCUDABuiltin.cpp with 91% similarity]	patch \| blob \| history
lib/CodeGen/CMakeLists.txt		patch \| blob \| history
lib/CodeGen/CodeGenFunction.h		patch \| blob \| history
test/OpenMP/nvptx_target_printf_codegen.c	[new file with mode: 0644]	patch \| blob