From 60741365b3dfa31f947a62bdcac6be76b9b5d7f0 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 8 Dec 2016 19:38:13 +0000 Subject: [PATCH] [CUDA] Ignore implicit target attributes during function template instantiation. Some functions and templates are treated as __host__ __device__ even when they don't have explicitly specified target attributes. What's worse, this treatment may change depending on command line options (-fno-cuda-host-device-constexpr) or #pragma clang force_cuda_host_device. Combined with strict checking for matching function target that comes with D25809(r288962), it makes it hard to write code which would explicitly instantiate or specialize some functions regardless of pragmas or command line options in effect. This patch changes the way we match target attributes of base template vs attributes used in explicit instantiation or specialization so that only explicitly specified attributes are considered. This makes base template selection behave consistently regardless of pragma of command line options that may affect CUDA target. Differential Revision: https://reviews.llvm.org/D25845 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@289091 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Sema/Sema.h | 8 +++- lib/Sema/SemaCUDA.cpp | 39 ++++++++++++--- lib/Sema/SemaDecl.cpp | 12 +++-- lib/Sema/SemaTemplate.cpp | 53 +++++++++++---------- test/SemaCUDA/function-template-overload.cu | 41 ++++++++++++++-- 5 files changed, 113 insertions(+), 40 deletions(-) diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index fcc0d53b2d..0523282e0a 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -9420,7 +9420,8 @@ public: /// /// Use this rather than examining the function's attributes yourself -- you /// will get it wrong. Returns CFT_Host if D is null. - CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D); + CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D, + bool IgnoreImplicitHDAttr = false); CUDAFunctionTarget IdentifyCUDATarget(const AttributeList *Attr); /// Gets the CUDA target for the current context. @@ -9522,7 +9523,10 @@ public: /// Check whether NewFD is a valid overload for CUDA. Emits /// diagnostics and invalidates NewFD if not. - void checkCUDATargetOverload(FunctionDecl *NewFD, LookupResult &Previous); + void checkCUDATargetOverload(FunctionDecl *NewFD, + const LookupResult &Previous); + /// Copies target attributes from the template TD to the function FD. + void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); /// \name Code completion //@{ diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp index 5e6d0e3e53..6f272ec839 100644 --- a/lib/Sema/SemaCUDA.cpp +++ b/lib/Sema/SemaCUDA.cpp @@ -93,8 +93,17 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) { return CFT_Host; } +template +static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { + return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { + return isa(Attribute) && + !(IgnoreImplicitAttr && Attribute->isImplicit()); + }); +} + /// IdentifyCUDATarget - Determine the CUDA compilation target for this function -Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) { +Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, + bool IgnoreImplicitHDAttr) { // Code that lives outside a function is run on the host. if (D == nullptr) return CFT_Host; @@ -105,13 +114,13 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) { if (D->hasAttr()) return CFT_Global; - if (D->hasAttr()) { - if (D->hasAttr()) + if (hasAttr(D, IgnoreImplicitHDAttr)) { + if (hasAttr(D, IgnoreImplicitHDAttr)) return CFT_HostDevice; return CFT_Device; - } else if (D->hasAttr()) { + } else if (hasAttr(D, IgnoreImplicitHDAttr)) { return CFT_Host; - } else if (D->isImplicit()) { + } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { // Some implicit declarations (like intrinsic functions) are not marked. // Set the most lenient target on them for maximal flexibility. return CFT_HostDevice; @@ -856,7 +865,7 @@ void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { } void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, - LookupResult &Previous) { + const LookupResult &Previous) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); for (NamedDecl *OldND : Previous) { @@ -883,3 +892,21 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, } } } + +template +static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, + const FunctionDecl &TemplateFD) { + if (AttrTy *Attribute = TemplateFD.getAttr()) { + AttrTy *Clone = Attribute->clone(S.Context); + Clone->setInherited(true); + FD->addAttr(Clone); + } +} + +void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, + const FunctionTemplateDecl &TD) { + const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); + copyAttrIfPresent(*this, FD, TemplateFD); + copyAttrIfPresent(*this, FD, TemplateFD); + copyAttrIfPresent(*this, FD, TemplateFD); +} diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp index be2466c923..5d13c8fa03 100644 --- a/lib/Sema/SemaDecl.cpp +++ b/lib/Sema/SemaDecl.cpp @@ -8305,9 +8305,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // Handle attributes. ProcessDeclAttributes(S, NewFD, D); - if (getLangOpts().CUDA) - maybeAddCUDAHostDeviceAttrs(NewFD, Previous); - if (getLangOpts().OpenCL) { // OpenCL v1.1 s6.5: Using an address space qualifier in a function return // type declaration will generate a compilation error. @@ -8410,6 +8407,15 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, TemplateArgs.setRAngleLoc(D.getIdentifierLoc()); } + // We do not add HD attributes to specializations here because + // they may have different constexpr-ness compared to their + // templates and, after maybeAddCUDAHostDeviceAttrs() is applied, + // may end up with different effective targets. Instead, a + // specialization inherits its target attributes from its template + // in the CheckFunctionTemplateSpecialization() call below. + if (getLangOpts().CUDA & !isFunctionTemplateSpecialization) + maybeAddCUDAHostDeviceAttrs(NewFD, Previous); + // If it's a friend (and only if it's a friend), it's possible // that either the specialized function type or the specialized // template is dependent, and therefore matching will fail. In diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp index 898765cbd7..4846b25232 100644 --- a/lib/Sema/SemaTemplate.cpp +++ b/lib/Sema/SemaTemplate.cpp @@ -7043,13 +7043,15 @@ bool Sema::CheckFunctionTemplateSpecialization( continue; } - // Target attributes are part of function signature during cuda - // compilation, so deduced template must also have matching CUDA - // target. Given that regular template deduction does not take - // target attributes into account, we perform target match check - // here and reject candidates that have different target. + // Target attributes are part of the cuda function signature, so + // the deduced template's cuda target must match that of the + // specialization. Given that C++ template deduction does not + // take target attributes into account, we reject candidates + // here that have a different target. if (LangOpts.CUDA && - IdentifyCUDATarget(Specialization) != IdentifyCUDATarget(FD)) { + IdentifyCUDATarget(Specialization, + /* IgnoreImplicitHDAttributes = */ true) != + IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) { FailedCandidates.addCandidate().set( I.getPair(), FunTmpl->getTemplatedDecl(), MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info)); @@ -7166,6 +7168,14 @@ bool Sema::CheckFunctionTemplateSpecialization( SpecInfo->getTemplateSpecializationKind(), ExplicitTemplateArgs ? &ConvertedTemplateArgs[Specialization] : nullptr); + // A function template specialization inherits the target attributes + // of its template. (We require the attributes explicitly in the + // code to match, but a template may have implicit attributes by + // virtue e.g. of being constexpr, and it passes these implicit + // attributes on to its specializations.) + if (LangOpts.CUDA) + inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate()); + // The "previous declaration" for this function template specialization is // the prior function template specialization. Previous.clear(); @@ -8154,24 +8164,19 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S, continue; } - // Target attributes are part of function signature during cuda - // compilation, so deduced template must also have matching CUDA - // target. Given that regular template deduction does not take it - // into account, we perform target match check here and reject - // candidates that have different target. - if (LangOpts.CUDA) { - CUDAFunctionTarget DeclaratorTarget = IdentifyCUDATarget(Attr); - // We need to adjust target when HD is forced by - // #pragma clang force_cuda_host_device - if (ForceCUDAHostDeviceDepth > 0 && - (DeclaratorTarget == CFT_Device || DeclaratorTarget == CFT_Host)) - DeclaratorTarget = CFT_HostDevice; - if (IdentifyCUDATarget(Specialization) != DeclaratorTarget) { - FailedCandidates.addCandidate().set( - P.getPair(), FunTmpl->getTemplatedDecl(), - MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info)); - continue; - } + // Target attributes are part of the cuda function signature, so + // the cuda target of the instantiated function must match that of its + // template. Given that C++ template deduction does not take + // target attributes into account, we reject candidates here that + // have a different target. + if (LangOpts.CUDA && + IdentifyCUDATarget(Specialization, + /* IgnoreImplicitHDAttributes = */ true) != + IdentifyCUDATarget(Attr)) { + FailedCandidates.addCandidate().set( + P.getPair(), FunTmpl->getTemplatedDecl(), + MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info)); + continue; } Matches.addDecl(Specialization, P.getAccess()); diff --git a/test/SemaCUDA/function-template-overload.cu b/test/SemaCUDA/function-template-overload.cu index 8adeb84957..56bba65395 100644 --- a/test/SemaCUDA/function-template-overload.cu +++ b/test/SemaCUDA/function-template-overload.cu @@ -31,7 +31,8 @@ template <> __device__ DType overload_h_d(long a); // OK. instantiates D template <> __host__ HType overload_h_d(long a); // OK. instantiates H -// Can't overload HD template with H or D template, though functions are OK. +// Can't overload HD template with H or D template, though +// non-template functions are OK. template __host__ __device__ HDType overload_hd(T a) { return HDType(); } // expected-note@-1 {{previous declaration is here}} // expected-note@-2 2 {{candidate template ignored: could not match 'HDType' against 'HType'}} @@ -56,24 +57,54 @@ template __host__ HType overload_h_d2(T a) { return HType(); } template __host__ __device__ HDType overload_h_d2(T a) { return HDType(); } template __device__ DType overload_h_d2(T1 a) { T1 x; T2 y; return DType(); } +// constexpr functions are implicitly HD, but explicit +// instantiation/specialization must use target attributes as written. +template constexpr T overload_ce_implicit_hd(T a) { return a+1; } +// expected-note@-1 3 {{candidate template ignored: target attributes do not match}} + +// These will not match the template. +template __host__ __device__ int overload_ce_implicit_hd(int a); +// expected-error@-1 {{explicit instantiation of 'overload_ce_implicit_hd' does not refer to a function template, variable template, member function, member class, or static data member}} +template <> __host__ __device__ long overload_ce_implicit_hd(long a); +// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}} +template <> __host__ __device__ constexpr long overload_ce_implicit_hd(long a); +// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}} + +// These should work, because template matching ignores the implicit +// HD attributes the compiler gives to constexpr functions/templates, +// so 'overload_ce_implicit_hd' template will match __host__ functions +// only. +template __host__ int overload_ce_implicit_hd(int a); +template <> __host__ long overload_ce_implicit_hd(long a); + +template float overload_ce_implicit_hd(float a); +template <> float* overload_ce_implicit_hd(float *a); +template <> constexpr double overload_ce_implicit_hd(double a) { return a + 3.0; }; + __host__ void hf() { overload_hd(13); + overload_ce_implicit_hd('h'); // Implicitly instantiated + overload_ce_implicit_hd(1.0f); // Explicitly instantiated + overload_ce_implicit_hd(2.0); // Explicitly specialized HType h = overload_h_d(10); HType h2i = overload_h_d2(11); HType h2ii = overload_h_d2(12); // These should be implicitly instantiated from __host__ template returning HType. - DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}} - DType d2i = overload_h_d2(21); // expected-error {{no viable conversion from 'HType' to 'DType'}} + DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}} + DType d2i = overload_h_d2(21); // expected-error {{no viable conversion from 'HType' to 'DType'}} DType d2ii = overload_h_d2(22); // expected-error {{no viable conversion from 'HType' to 'DType'}} } __device__ void df() { overload_hd(23); + overload_ce_implicit_hd('d'); // Implicitly instantiated + overload_ce_implicit_hd(1.0f); // Explicitly instantiated + overload_ce_implicit_hd(2.0); // Explicitly specialized // These should be implicitly instantiated from __device__ template returning DType. - HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}} - HType h2i = overload_h_d2(11); // expected-error {{no viable conversion from 'DType' to 'HType'}} + HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}} + HType h2i = overload_h_d2(11); // expected-error {{no viable conversion from 'DType' to 'HType'}} HType h2ii = overload_h_d2(12); // expected-error {{no viable conversion from 'DType' to 'HType'}} DType d = overload_h_d(20); -- 2.40.0