///
/// Use this rather than examining the function's attributes yourself -- you
/// will get it wrong. Returns CFT_Host if D is null.
- CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
+ CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
+ bool IgnoreImplicitHDAttr = false);
CUDAFunctionTarget IdentifyCUDATarget(const AttributeList *Attr);
/// Gets the CUDA target for the current context.
/// Check whether NewFD is a valid overload for CUDA. Emits
/// diagnostics and invalidates NewFD if not.
- void checkCUDATargetOverload(FunctionDecl *NewFD, LookupResult &Previous);
+ void checkCUDATargetOverload(FunctionDecl *NewFD,
+ const LookupResult &Previous);
+ /// Copies target attributes from the template TD to the function FD.
+ void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD);
/// \name Code completion
//@{
return CFT_Host;
}
+template <typename A>
+static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+ return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
+ return isa<A>(Attribute) &&
+ !(IgnoreImplicitAttr && Attribute->isImplicit());
+ });
+}
+
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
-Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
+ bool IgnoreImplicitHDAttr) {
// Code that lives outside a function is run on the host.
if (D == nullptr)
return CFT_Host;
if (D->hasAttr<CUDAGlobalAttr>())
return CFT_Global;
- if (D->hasAttr<CUDADeviceAttr>()) {
- if (D->hasAttr<CUDAHostAttr>())
+ if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
+ if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
return CFT_HostDevice;
return CFT_Device;
- } else if (D->hasAttr<CUDAHostAttr>()) {
+ } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
return CFT_Host;
- } else if (D->isImplicit()) {
+ } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
// Some implicit declarations (like intrinsic functions) are not marked.
// Set the most lenient target on them for maximal flexibility.
return CFT_HostDevice;
}
void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
- LookupResult &Previous) {
+ const LookupResult &Previous) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
for (NamedDecl *OldND : Previous) {
}
}
}
+
+template <typename AttrTy>
+static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
+ const FunctionDecl &TemplateFD) {
+ if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
+ AttrTy *Clone = Attribute->clone(S.Context);
+ Clone->setInherited(true);
+ FD->addAttr(Clone);
+ }
+}
+
+void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
+ const FunctionTemplateDecl &TD) {
+ const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
+ copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
+ copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
+ copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
+}
// Handle attributes.
ProcessDeclAttributes(S, NewFD, D);
- if (getLangOpts().CUDA)
- maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
-
if (getLangOpts().OpenCL) {
// OpenCL v1.1 s6.5: Using an address space qualifier in a function return
// type declaration will generate a compilation error.
TemplateArgs.setRAngleLoc(D.getIdentifierLoc());
}
+ // We do not add HD attributes to specializations here because
+ // they may have different constexpr-ness compared to their
+ // templates and, after maybeAddCUDAHostDeviceAttrs() is applied,
+ // may end up with different effective targets. Instead, a
+ // specialization inherits its target attributes from its template
+ // in the CheckFunctionTemplateSpecialization() call below.
+ if (getLangOpts().CUDA & !isFunctionTemplateSpecialization)
+ maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
+
// If it's a friend (and only if it's a friend), it's possible
// that either the specialized function type or the specialized
// template is dependent, and therefore matching will fail. In
continue;
}
- // Target attributes are part of function signature during cuda
- // compilation, so deduced template must also have matching CUDA
- // target. Given that regular template deduction does not take
- // target attributes into account, we perform target match check
- // here and reject candidates that have different target.
+ // Target attributes are part of the cuda function signature, so
+ // the deduced template's cuda target must match that of the
+ // specialization. Given that C++ template deduction does not
+ // take target attributes into account, we reject candidates
+ // here that have a different target.
if (LangOpts.CUDA &&
- IdentifyCUDATarget(Specialization) != IdentifyCUDATarget(FD)) {
+ IdentifyCUDATarget(Specialization,
+ /* IgnoreImplicitHDAttributes = */ true) !=
+ IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) {
FailedCandidates.addCandidate().set(
I.getPair(), FunTmpl->getTemplatedDecl(),
MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
SpecInfo->getTemplateSpecializationKind(),
ExplicitTemplateArgs ? &ConvertedTemplateArgs[Specialization] : nullptr);
+ // A function template specialization inherits the target attributes
+ // of its template. (We require the attributes explicitly in the
+ // code to match, but a template may have implicit attributes by
+ // virtue e.g. of being constexpr, and it passes these implicit
+ // attributes on to its specializations.)
+ if (LangOpts.CUDA)
+ inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate());
+
// The "previous declaration" for this function template specialization is
// the prior function template specialization.
Previous.clear();
continue;
}
- // Target attributes are part of function signature during cuda
- // compilation, so deduced template must also have matching CUDA
- // target. Given that regular template deduction does not take it
- // into account, we perform target match check here and reject
- // candidates that have different target.
- if (LangOpts.CUDA) {
- CUDAFunctionTarget DeclaratorTarget = IdentifyCUDATarget(Attr);
- // We need to adjust target when HD is forced by
- // #pragma clang force_cuda_host_device
- if (ForceCUDAHostDeviceDepth > 0 &&
- (DeclaratorTarget == CFT_Device || DeclaratorTarget == CFT_Host))
- DeclaratorTarget = CFT_HostDevice;
- if (IdentifyCUDATarget(Specialization) != DeclaratorTarget) {
- FailedCandidates.addCandidate().set(
- P.getPair(), FunTmpl->getTemplatedDecl(),
- MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
- continue;
- }
+ // Target attributes are part of the cuda function signature, so
+ // the cuda target of the instantiated function must match that of its
+ // template. Given that C++ template deduction does not take
+ // target attributes into account, we reject candidates here that
+ // have a different target.
+ if (LangOpts.CUDA &&
+ IdentifyCUDATarget(Specialization,
+ /* IgnoreImplicitHDAttributes = */ true) !=
+ IdentifyCUDATarget(Attr)) {
+ FailedCandidates.addCandidate().set(
+ P.getPair(), FunTmpl->getTemplatedDecl(),
+ MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
+ continue;
}
Matches.addDecl(Specialization, P.getAccess());
template <> __host__ HType overload_h_d(long a); // OK. instantiates H
-// Can't overload HD template with H or D template, though functions are OK.
+// Can't overload HD template with H or D template, though
+// non-template functions are OK.
template <typename T> __host__ __device__ HDType overload_hd(T a) { return HDType(); }
// expected-note@-1 {{previous declaration is here}}
// expected-note@-2 2 {{candidate template ignored: could not match 'HDType' against 'HType'}}
template <typename T> __host__ __device__ HDType overload_h_d2(T a) { return HDType(); }
template <typename T1, typename T2 = int> __device__ DType overload_h_d2(T1 a) { T1 x; T2 y; return DType(); }
+// constexpr functions are implicitly HD, but explicit
+// instantiation/specialization must use target attributes as written.
+template <typename T> constexpr T overload_ce_implicit_hd(T a) { return a+1; }
+// expected-note@-1 3 {{candidate template ignored: target attributes do not match}}
+
+// These will not match the template.
+template __host__ __device__ int overload_ce_implicit_hd(int a);
+// expected-error@-1 {{explicit instantiation of 'overload_ce_implicit_hd' does not refer to a function template, variable template, member function, member class, or static data member}}
+template <> __host__ __device__ long overload_ce_implicit_hd(long a);
+// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}}
+template <> __host__ __device__ constexpr long overload_ce_implicit_hd(long a);
+// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}}
+
+// These should work, because template matching ignores the implicit
+// HD attributes the compiler gives to constexpr functions/templates,
+// so 'overload_ce_implicit_hd' template will match __host__ functions
+// only.
+template __host__ int overload_ce_implicit_hd(int a);
+template <> __host__ long overload_ce_implicit_hd(long a);
+
+template float overload_ce_implicit_hd(float a);
+template <> float* overload_ce_implicit_hd(float *a);
+template <> constexpr double overload_ce_implicit_hd(double a) { return a + 3.0; };
+
__host__ void hf() {
overload_hd(13);
+ overload_ce_implicit_hd('h'); // Implicitly instantiated
+ overload_ce_implicit_hd(1.0f); // Explicitly instantiated
+ overload_ce_implicit_hd(2.0); // Explicitly specialized
HType h = overload_h_d(10);
HType h2i = overload_h_d2<int>(11);
HType h2ii = overload_h_d2<int>(12);
// These should be implicitly instantiated from __host__ template returning HType.
- DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}}
- DType d2i = overload_h_d2<int>(21); // expected-error {{no viable conversion from 'HType' to 'DType'}}
+ DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}}
+ DType d2i = overload_h_d2<int>(21); // expected-error {{no viable conversion from 'HType' to 'DType'}}
DType d2ii = overload_h_d2<int>(22); // expected-error {{no viable conversion from 'HType' to 'DType'}}
}
__device__ void df() {
overload_hd(23);
+ overload_ce_implicit_hd('d'); // Implicitly instantiated
+ overload_ce_implicit_hd(1.0f); // Explicitly instantiated
+ overload_ce_implicit_hd(2.0); // Explicitly specialized
// These should be implicitly instantiated from __device__ template returning DType.
- HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}}
- HType h2i = overload_h_d2<int>(11); // expected-error {{no viable conversion from 'DType' to 'HType'}}
+ HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}}
+ HType h2i = overload_h_d2<int>(11); // expected-error {{no viable conversion from 'DType' to 'HType'}}
HType h2ii = overload_h_d2<int>(12); // expected-error {{no viable conversion from 'DType' to 'HType'}}
DType d = overload_h_d(20);