[NVPTX] Implement __nvvm_atom_add_gen_d builtin.

author Justin Lebar <jlebar@google.com>

Tue, 7 Nov 2017 22:10:54 +0000 (22:10 +0000)

committer Justin Lebar <jlebar@google.com>

Tue, 7 Nov 2017 22:10:54 +0000 (22:10 +0000)
author Justin Lebar <jlebar@google.com>
Tue, 7 Nov 2017 22:10:54 +0000 (22:10 +0000)
committer Justin Lebar <jlebar@google.com>
Tue, 7 Nov 2017 22:10:54 +0000 (22:10 +0000)
diff --git a/include/clang/Basic/BuiltinsNVPTX.def b/include/clang/Basic/BuiltinsNVPTX.def

index caa860480f7e1edbf102d52d7c0e4b3e0ce38cb3..b596793c9c12854e22dcc1aa4a4f51c98c8eec56 100644 (file)
--- a/include/clang/Basic/BuiltinsNVPTX.def
+++ b/include/clang/Basic/BuiltinsNVPTX.def
@@ -481,7 +481,7 @@ TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", "satom")
  TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", "satom")
  BUILTIN(__nvvm_atom_add_g_d, "ddD*1d", "n")
  BUILTIN(__nvvm_atom_add_s_d, "ddD*3d", "n")
-BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n")
+TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", "satom")
  TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", "satom")
  TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", "satom")
  
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp

index f1b8e2e611097b797a17dfeec4137a985edeadd2..369240f316c2b620f429503a50fecbbadbf1c4d5 100644 (file)
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -9554,6 +9554,16 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
      return Builder.CreateCall(FnALAF32, {Ptr, Val});
    }
  
+  case NVPTX::BI__nvvm_atom_add_gen_d: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    // atomicrmw only deals with integer arguments, so we need to use
+    // LLVM's nvvm_atomic_load_add_f64 intrinsic.
+    Value *FnALAF64 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
+    return Builder.CreateCall(FnALAF64, {Ptr, Val});
+  }
+
    case NVPTX::BI__nvvm_atom_inc_gen_ui: {
      Value *Ptr = EmitScalarExpr(E->getArg(0));
      Value *Val = EmitScalarExpr(E->getArg(1));
diff --git a/test/CodeGen/builtins-nvptx-ptx50.cu b/test/CodeGen/builtins-nvptx-ptx50.cu

new file mode 100644 (file)

index 0000000..e85be44
--- /dev/null
+++ b/test/CodeGen/builtins-nvptx-ptx50.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \
+// RUN:            -fcuda-is-device -S -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CHECK %s
+//
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_50 \
+// RUN:   -fcuda-is-device -S -o /dev/null -x cuda -verify %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+// We have to keep all builtins that depend on particular target feature in the
+// same function, because the codegen will stop after the very first function
+// that encounters an error, so -verify will not be able to find errors in
+// subsequent functions.
+
+// CHECK-LABEL: test_fn
+__device__ void test_fn(double d, double* double_ptr) {
+  // CHECK: call double @llvm.nvvm.atomic.load.add.f64.p0f64
+  // expected-error@+1 {{'__nvvm_atom_add_gen_d' needs target feature satom}}
+  __nvvm_atom_add_gen_d(double_ptr, d);
+}
author	Justin Lebar <jlebar@google.com>
	Tue, 7 Nov 2017 22:10:54 +0000 (22:10 +0000)
committer	Justin Lebar <jlebar@google.com>
	Tue, 7 Nov 2017 22:10:54 +0000 (22:10 +0000)
include/clang/Basic/BuiltinsNVPTX.def		patch \| blob \| history
lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
test/CodeGen/builtins-nvptx-ptx50.cu	[new file with mode: 0644]	patch \| blob