[CUDA] Ignore implicit target attributes during function template instantiation.

author Artem Belevich <tra@google.com>

Thu, 8 Dec 2016 19:38:13 +0000 (19:38 +0000)

committer Artem Belevich <tra@google.com>

Thu, 8 Dec 2016 19:38:13 +0000 (19:38 +0000)
author Artem Belevich <tra@google.com>
Thu, 8 Dec 2016 19:38:13 +0000 (19:38 +0000)
committer Artem Belevich <tra@google.com>
Thu, 8 Dec 2016 19:38:13 +0000 (19:38 +0000)
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h

index fcc0d53b2d4f293d49cbfdf1cbeb2987560ad5b1..0523282e0a3a176dfcd537233c30407ccd4c9ed7 100644 (file)
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -9420,7 +9420,8 @@ public:
    ///
    /// Use this rather than examining the function's attributes yourself -- you
    /// will get it wrong.  Returns CFT_Host if D is null.
-  CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
+  CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
+                                        bool IgnoreImplicitHDAttr = false);
    CUDAFunctionTarget IdentifyCUDATarget(const AttributeList *Attr);
  
    /// Gets the CUDA target for the current context.
@@ -9522,7 +9523,10 @@ public:
  
    /// Check whether NewFD is a valid overload for CUDA. Emits
    /// diagnostics and invalidates NewFD if not.
-  void checkCUDATargetOverload(FunctionDecl *NewFD, LookupResult &Previous);
+  void checkCUDATargetOverload(FunctionDecl *NewFD,
+                               const LookupResult &Previous);
+  /// Copies target attributes from the template TD to the function FD.
+  void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD);
  
    /// \name Code completion
    //@{
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp

index 5e6d0e3e53b4c14943779869f1f92f55b63c49f5..6f272ec839f5330ede8dd59ab338e3f8f54fe9f3 100644 (file)
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -93,8 +93,17 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) {
    return CFT_Host;
  }
  
+template <typename A>
+static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
+           return isa<A>(Attribute) &&
+                  !(IgnoreImplicitAttr && Attribute->isImplicit());
+         });
+}
+
  /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
-Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
+                                                  bool IgnoreImplicitHDAttr) {
    // Code that lives outside a function is run on the host.
    if (D == nullptr)
      return CFT_Host;
@@ -105,13 +114,13 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
    if (D->hasAttr<CUDAGlobalAttr>())
      return CFT_Global;
  
-  if (D->hasAttr<CUDADeviceAttr>()) {
-    if (D->hasAttr<CUDAHostAttr>())
+  if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
+    if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
        return CFT_HostDevice;
      return CFT_Device;
-  } else if (D->hasAttr<CUDAHostAttr>()) {
+  } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
      return CFT_Host;
-  } else if (D->isImplicit()) {
+  } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
      // Some implicit declarations (like intrinsic functions) are not marked.
      // Set the most lenient target on them for maximal flexibility.
      return CFT_HostDevice;
@@ -856,7 +865,7 @@ void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
  }
  
  void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
-                                   LookupResult &Previous) {
+                                   const LookupResult &Previous) {
    assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
    CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
    for (NamedDecl *OldND : Previous) {
@@ -883,3 +892,21 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
      }
    }
  }
+
+template <typename AttrTy>
+static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
+                              const FunctionDecl &TemplateFD) {
+  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
+    AttrTy *Clone = Attribute->clone(S.Context);
+    Clone->setInherited(true);
+    FD->addAttr(Clone);
+  }
+}
+
+void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
+                                  const FunctionTemplateDecl &TD) {
+  const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
+  copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
+  copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
+  copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
+}
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp

index be2466c9235e97f674c39d4629d9381dc5a8d506..5d13c8fa0399fe931b119e45f77992060bc1a50e 100644 (file)
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -8305,9 +8305,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
    // Handle attributes.
    ProcessDeclAttributes(S, NewFD, D);
  
-  if (getLangOpts().CUDA)
-    maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
-
    if (getLangOpts().OpenCL) {
      // OpenCL v1.1 s6.5: Using an address space qualifier in a function return
      // type declaration will generate a compilation error.
@@ -8410,6 +8407,15 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
        TemplateArgs.setRAngleLoc(D.getIdentifierLoc());
      }
  
+    // We do not add HD attributes to specializations here because
+    // they may have different constexpr-ness compared to their
+    // templates and, after maybeAddCUDAHostDeviceAttrs() is applied,
+    // may end up with different effective targets. Instead, a
+    // specialization inherits its target attributes from its template
+    // in the CheckFunctionTemplateSpecialization() call below.
+    if (getLangOpts().CUDA & !isFunctionTemplateSpecialization)
+      maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
+
      // If it's a friend (and only if it's a friend), it's possible
      // that either the specialized function type or the specialized
      // template is dependent, and therefore matching will fail.  In
diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp

index 898765cbd79940e9bb93465731217a031bf9bee4..4846b25232a489916a37736f5e0344d2b920f8e6 100644 (file)
--- a/lib/Sema/SemaTemplate.cpp
+++ b/lib/Sema/SemaTemplate.cpp
@@ -7043,13 +7043,15 @@ bool Sema::CheckFunctionTemplateSpecialization(
          continue;
        }
  
-      // Target attributes are part of function signature during cuda
-      // compilation, so deduced template must also have matching CUDA
-      // target. Given that regular template deduction does not take
-      // target attributes into account, we perform target match check
-      // here and reject candidates that have different target.
+      // Target attributes are part of the cuda function signature, so
+      // the deduced template's cuda target must match that of the
+      // specialization.  Given that C++ template deduction does not
+      // take target attributes into account, we reject candidates
+      // here that have a different target.
        if (LangOpts.CUDA &&
-          IdentifyCUDATarget(Specialization) != IdentifyCUDATarget(FD)) {
+          IdentifyCUDATarget(Specialization,
+                             /* IgnoreImplicitHDAttributes = */ true) !=
+              IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) {
          FailedCandidates.addCandidate().set(
              I.getPair(), FunTmpl->getTemplatedDecl(),
              MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
@@ -7166,6 +7168,14 @@ bool Sema::CheckFunctionTemplateSpecialization(
        SpecInfo->getTemplateSpecializationKind(),
        ExplicitTemplateArgs ? &ConvertedTemplateArgs[Specialization] : nullptr);
  
+  // A function template specialization inherits the target attributes
+  // of its template.  (We require the attributes explicitly in the
+  // code to match, but a template may have implicit attributes by
+  // virtue e.g. of being constexpr, and it passes these implicit
+  // attributes on to its specializations.)
+  if (LangOpts.CUDA)
+    inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate());
+
    // The "previous declaration" for this function template specialization is
    // the prior function template specialization.
    Previous.clear();
@@ -8154,24 +8164,19 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
        continue;
      }
  
-    // Target attributes are part of function signature during cuda
-    // compilation, so deduced template must also have matching CUDA
-    // target. Given that regular template deduction does not take it
-    // into account, we perform target match check here and reject
-    // candidates that have different target.
-    if (LangOpts.CUDA) {
-      CUDAFunctionTarget DeclaratorTarget = IdentifyCUDATarget(Attr);
-      // We need to adjust target when HD is forced by
-      // #pragma clang force_cuda_host_device
-      if (ForceCUDAHostDeviceDepth > 0 &&
-          (DeclaratorTarget == CFT_Device || DeclaratorTarget == CFT_Host))
-        DeclaratorTarget = CFT_HostDevice;
-      if (IdentifyCUDATarget(Specialization) != DeclaratorTarget) {
-        FailedCandidates.addCandidate().set(
-            P.getPair(), FunTmpl->getTemplatedDecl(),
-            MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
-        continue;
-      }
+    // Target attributes are part of the cuda function signature, so
+    // the cuda target of the instantiated function must match that of its
+    // template.  Given that C++ template deduction does not take
+    // target attributes into account, we reject candidates here that
+    // have a different target.
+    if (LangOpts.CUDA &&
+        IdentifyCUDATarget(Specialization,
+                           /* IgnoreImplicitHDAttributes = */ true) !=
+            IdentifyCUDATarget(Attr)) {
+      FailedCandidates.addCandidate().set(
+          P.getPair(), FunTmpl->getTemplatedDecl(),
+          MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
+      continue;
      }
  
      Matches.addDecl(Specialization, P.getAccess());
diff --git a/test/SemaCUDA/function-template-overload.cu b/test/SemaCUDA/function-template-overload.cu

index 8adeb849570b983fd91c1dcc0b4584ce1ce52e37..56bba653958c14ef7224b2904067e838044c05c1 100644 (file)
--- a/test/SemaCUDA/function-template-overload.cu
+++ b/test/SemaCUDA/function-template-overload.cu
@@ -31,7 +31,8 @@ template  <> __device__ DType overload_h_d(long a); // OK. instantiates D
  template  <> __host__ HType overload_h_d(long a); // OK. instantiates H
  
  
-// Can't overload HD template with H or D template, though functions are OK.
+// Can't overload HD template with H or D template, though
+// non-template functions are OK.
  template <typename T> __host__ __device__ HDType overload_hd(T a) { return HDType(); }
  // expected-note@-1 {{previous declaration is here}}
  // expected-note@-2 2 {{candidate template ignored: could not match 'HDType' against 'HType'}}
@@ -56,24 +57,54 @@ template <typename T> __host__ HType overload_h_d2(T a) { return HType(); }
  template <typename T> __host__ __device__ HDType overload_h_d2(T a) { return HDType(); }
  template <typename T1, typename T2 = int> __device__ DType overload_h_d2(T1 a) { T1 x; T2 y; return DType(); }
  
+// constexpr functions are implicitly HD, but explicit
+// instantiation/specialization must use target attributes as written.
+template <typename T> constexpr T overload_ce_implicit_hd(T a) { return a+1; }
+// expected-note@-1 3 {{candidate template ignored: target attributes do not match}}
+
+// These will not match the template.
+template __host__ __device__ int overload_ce_implicit_hd(int a);
+// expected-error@-1 {{explicit instantiation of 'overload_ce_implicit_hd' does not refer to a function template, variable template, member function, member class, or static data member}}
+template <> __host__ __device__ long overload_ce_implicit_hd(long a);
+// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}}
+template <> __host__ __device__ constexpr long overload_ce_implicit_hd(long a);
+// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}}
+
+// These should work, because template matching ignores the implicit
+// HD attributes the compiler gives to constexpr functions/templates,
+// so 'overload_ce_implicit_hd' template will match __host__ functions
+// only.
+template __host__ int overload_ce_implicit_hd(int a);
+template <> __host__ long overload_ce_implicit_hd(long a);
+
+template float overload_ce_implicit_hd(float a);
+template <> float* overload_ce_implicit_hd(float *a);
+template <> constexpr double overload_ce_implicit_hd(double a) { return a + 3.0; };
+
  __host__ void hf() {
    overload_hd(13);
+  overload_ce_implicit_hd('h');        // Implicitly instantiated
+  overload_ce_implicit_hd(1.0f);       // Explicitly instantiated
+  overload_ce_implicit_hd(2.0);        // Explicitly specialized
  
    HType h = overload_h_d(10);
    HType h2i = overload_h_d2<int>(11);
    HType h2ii = overload_h_d2<int>(12);
  
    // These should be implicitly instantiated from __host__ template returning HType.
-  DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}}
-  DType d2i = overload_h_d2<int>(21); // expected-error {{no viable conversion from 'HType' to 'DType'}}
+  DType d = overload_h_d(20);          // expected-error {{no viable conversion from 'HType' to 'DType'}}
+  DType d2i = overload_h_d2<int>(21);  // expected-error {{no viable conversion from 'HType' to 'DType'}}
    DType d2ii = overload_h_d2<int>(22); // expected-error {{no viable conversion from 'HType' to 'DType'}}
  }
  __device__ void df() {
    overload_hd(23);
+  overload_ce_implicit_hd('d');        // Implicitly instantiated
+  overload_ce_implicit_hd(1.0f);       // Explicitly instantiated
+  overload_ce_implicit_hd(2.0);        // Explicitly specialized
  
    // These should be implicitly instantiated from __device__ template returning DType.
-  HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}}
-  HType h2i = overload_h_d2<int>(11); // expected-error {{no viable conversion from 'DType' to 'HType'}}
+  HType h = overload_h_d(10);          // expected-error {{no viable conversion from 'DType' to 'HType'}}
+  HType h2i = overload_h_d2<int>(11);  // expected-error {{no viable conversion from 'DType' to 'HType'}}
    HType h2ii = overload_h_d2<int>(12); // expected-error {{no viable conversion from 'DType' to 'HType'}}
  
    DType d = overload_h_d(20);
author	Artem Belevich <tra@google.com>
	Thu, 8 Dec 2016 19:38:13 +0000 (19:38 +0000)
committer	Artem Belevich <tra@google.com>
	Thu, 8 Dec 2016 19:38:13 +0000 (19:38 +0000)
include/clang/Sema/Sema.h		patch \| blob \| history
lib/Sema/SemaCUDA.cpp		patch \| blob \| history
lib/Sema/SemaDecl.cpp		patch \| blob \| history
lib/Sema/SemaTemplate.cpp		patch \| blob \| history
test/SemaCUDA/function-template-overload.cu		patch \| blob \| history