Summary:
This just seems to have been an oversight. We already supported the f64
atomic add with an explicit scope (e.g. "cta"), but not the scopeless
version.
Reviewers: tra
Subscribers: jholewinski, sanjoy, cfe-commits, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D39638
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@317623
91177308-0d34-0410-b5e6-
96231b3b80d8
TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", "satom")
BUILTIN(__nvvm_atom_add_g_d, "ddD*1d", "n")
BUILTIN(__nvvm_atom_add_s_d, "ddD*3d", "n")
-BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n")
+TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", "satom")
TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", "satom")
TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", "satom")
return Builder.CreateCall(FnALAF32, {Ptr, Val});
}
+ case NVPTX::BI__nvvm_atom_add_gen_d: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ // atomicrmw only deals with integer arguments, so we need to use
+ // LLVM's nvvm_atomic_load_add_f64 intrinsic.
+ Value *FnALAF64 =
+ CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
+ return Builder.CreateCall(FnALAF64, {Ptr, Val});
+ }
+
case NVPTX::BI__nvvm_atom_inc_gen_ui: {
Value *Ptr = EmitScalarExpr(E->getArg(0));
Value *Val = EmitScalarExpr(E->getArg(1));
--- /dev/null
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \
+// RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefix=CHECK %s
+//
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_50 \
+// RUN: -fcuda-is-device -S -o /dev/null -x cuda -verify %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+// We have to keep all builtins that depend on particular target feature in the
+// same function, because the codegen will stop after the very first function
+// that encounters an error, so -verify will not be able to find errors in
+// subsequent functions.
+
+// CHECK-LABEL: test_fn
+__device__ void test_fn(double d, double* double_ptr) {
+ // CHECK: call double @llvm.nvvm.atomic.load.add.f64.p0f64
+ // expected-error@+1 {{'__nvvm_atom_add_gen_d' needs target feature satom}}
+ __nvvm_atom_add_gen_d(double_ptr, d);
+}