[CUDA] Add implicit __attribute__((used)) to all __global__ functions.

author Artem Belevich <tra@google.com>

Mon, 10 Aug 2015 20:57:02 +0000 (20:57 +0000)

committer Artem Belevich <tra@google.com>

Mon, 10 Aug 2015 20:57:02 +0000 (20:57 +0000)
author Artem Belevich <tra@google.com>
Mon, 10 Aug 2015 20:57:02 +0000 (20:57 +0000)
committer Artem Belevich <tra@google.com>
Mon, 10 Aug 2015 20:57:02 +0000 (20:57 +0000)
diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp

index 31ddd503af9fdd3e4c7eeb3cc14850bf231e1bcb..8e04b69f81a7f17b6545d008556e30d94bb16e6e 100644 (file)
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@@ -3350,6 +3350,10 @@ static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) {
    D->addAttr(::new (S.Context)
                CUDAGlobalAttr(Attr.getRange(), S.Context,
                               Attr.getAttributeSpellingListIndex()));
+
+  // Add implicit attribute((used)) so we don't eliminate kernels
+  // because there is nothing referencing them on device side.
+  D->addAttr(UsedAttr::CreateImplicit(S.Context));
  }
  
  static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {
diff --git a/test/CodeGenCUDA/ptx-kernels.cu b/test/CodeGenCUDA/ptx-kernels.cu

index 658b3488fc18dc463e89a3619dfddf5d798fa72e..bf3b14d3a2867378359ce9ec4f290978c1358353 100644 (file)
--- a/test/CodeGenCUDA/ptx-kernels.cu
+++ b/test/CodeGenCUDA/ptx-kernels.cu
@@ -1,7 +1,16 @@
+// Make sure that __global__ functions are emitted along with correct
+// annotations and are added to @llvm.used to prevent their elimination.
+// REQUIRES: nvptx-registered-target
+//
  // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
  
  #include "Inputs/cuda.h"
  
+// Make sure that all __global__ functiona are added to @llvm.used
+// CHECK: @llvm.used = appending global
+// CHECK-SAME: @global_function
+// CHECK-SAME: @_Z16templated_kernelIiEvT_
+
  // CHECK-LABEL: define void @device_function
  extern "C"
  __device__ void device_function() {}
@@ -13,4 +22,10 @@ __global__ void global_function() {
    device_function();
  }
  
+// Make sure host-instantiated kernels are preserved on device side.
+template <typename T> __global__ void templated_kernel(T param) {}
+// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
+void host_function() { templated_kernel<<<0,0>>>(0); }
+
  // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
author	Artem Belevich <tra@google.com>
	Mon, 10 Aug 2015 20:57:02 +0000 (20:57 +0000)
committer	Artem Belevich <tra@google.com>
	Mon, 10 Aug 2015 20:57:02 +0000 (20:57 +0000)
lib/Sema/SemaDeclAttr.cpp		patch \| blob \| history
test/CodeGenCUDA/ptx-kernels.cu		patch \| blob \| history