[CUDA] Emit deferred diagnostics during Sema rather than during codegen.

author Justin Lebar <jlebar@google.com>

Thu, 13 Oct 2016 20:52:12 +0000 (20:52 +0000)

committer Justin Lebar <jlebar@google.com>

Thu, 13 Oct 2016 20:52:12 +0000 (20:52 +0000)
author Justin Lebar <jlebar@google.com>
Thu, 13 Oct 2016 20:52:12 +0000 (20:52 +0000)
committer Justin Lebar <jlebar@google.com>
Thu, 13 Oct 2016 20:52:12 +0000 (20:52 +0000)
diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h

index 711374b6151bf9d63caaa948a8b7616c88c91ac0..3d777524b8388647512ea9d864c22b4d6199418a 100644 (file)
--- a/include/clang/AST/ASTContext.h
+++ b/include/clang/AST/ASTContext.h
@@ -448,12 +448,6 @@ private:
    /// \brief Allocator for partial diagnostics.
    PartialDiagnostic::StorageAllocator DiagAllocator;
  
-  /// Diagnostics that are emitted if and only if the given function is
-  /// codegen'ed.  Access these through FunctionDecl::addDeferredDiag() and
-  /// FunctionDecl::takeDeferredDiags().
-  llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>>
-      DeferredDiags;
-
    /// \brief The current C++ ABI.
    std::unique_ptr<CXXABI> ABI;
    CXXABI *createCXXABI(const TargetInfo &T);
@@ -604,11 +598,6 @@ public:
      return DiagAllocator;
    }
  
-  decltype(DeferredDiags) &getDeferredDiags() { return DeferredDiags; }
-  const decltype(DeferredDiags) &getDeferredDiags() const {
-    return DeferredDiags;
-  }
-
    const TargetInfo &getTargetInfo() const { return *Target; }
    const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
  
diff --git a/include/clang/AST/Decl.h b/include/clang/AST/Decl.h

index 90d87272d4cb4df5b848935c57cb5aa84f455dcf..d24b37af61576b33df8d59fd36549665fc1430d6 100644 (file)
--- a/include/clang/AST/Decl.h
+++ b/include/clang/AST/Decl.h
@@ -2271,14 +2271,6 @@ public:
    /// returns 0.
    unsigned getMemoryFunctionKind() const;
  
-  /// Add a diagnostic to be emitted if and when this function is codegen'ed.
-  void addDeferredDiag(PartialDiagnosticAt PD);
-
-  /// Gets this object's list of deferred diagnostics, if there are any.
-  ///
-  /// Although this is logically const, it clears our list of deferred diags.
-  std::vector<PartialDiagnosticAt> takeDeferredDiags() const;
-
    // Implement isa/cast/dyncast/etc.
    static bool classof(const Decl *D) { return classofKind(D->getKind()); }
    static bool classofKind(Kind K) {
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h

index 1417b23b9aee75634790a0cf84d6192476f605bb..d0f62d9d37bd100054fd96b16880d7433fa34f15 100644 (file)
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -9245,6 +9245,30 @@ public:
    /// before incrementing, so you can emit an error.
    bool PopForceCUDAHostDevice();
  
+  /// Diagnostics that are emitted only if we discover that the given function
+  /// must be codegen'ed.  Because handling these correctly adds overhead to
+  /// compilation, this is currently only enabled for CUDA compilations.
+  llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>>
+      CUDADeferredDiags;
+
+  /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a
+  /// (maybe deferred) "bad call" diagnostic.  We use this to avoid emitting the
+  /// same deferred diag twice.
+  llvm::DenseSet<unsigned> LocsWithCUDACallDiags;
+
+  /// The set of CUDA functions that we've discovered must be emitted by tracing
+  /// the call graph.  Functions that we can tell a priori must be emitted
+  /// aren't added to this set.
+  llvm::DenseSet<FunctionDecl *> CUDAKnownEmittedFns;
+
+  /// A partial call graph maintained during CUDA compilation to support
+  /// deferred diagnostics.  Specifically, functions are only added here if, at
+  /// the time they're added, they are not known-emitted.  As soon as we
+  /// discover that a function is known-emitted, we remove it and everything it
+  /// transitively calls from this set and add those functions to
+  /// CUDAKnownEmittedFns.
+  llvm::DenseMap<FunctionDecl *, llvm::SetVector<FunctionDecl *>> CUDACallGraph;
+
    /// Diagnostic builder for CUDA errors which may or may not be deferred.
    ///
    /// In CUDA, there exist constructs (e.g. variable-length arrays, try/catch)
@@ -9298,12 +9322,15 @@ public:
  
    private:
      struct PartialDiagnosticInfo {
-      PartialDiagnosticInfo(SourceLocation Loc, PartialDiagnostic PD,
+      PartialDiagnosticInfo(Sema &S, SourceLocation Loc, PartialDiagnostic PD,
                              FunctionDecl *Fn)
-          : Loc(Loc), PD(std::move(PD)), Fn(Fn) {}
+          : S(S), Loc(Loc), PD(std::move(PD)), Fn(Fn) {}
  
-      ~PartialDiagnosticInfo() { Fn->addDeferredDiag({Loc, std::move(PD)}); }
+      ~PartialDiagnosticInfo() {
+        S.CUDADeferredDiags[Fn].push_back({Loc, std::move(PD)});
+      }
  
+      Sema &S;
        SourceLocation Loc;
        PartialDiagnostic PD;
        FunctionDecl *Fn;
@@ -9322,8 +9349,8 @@ public:
    /// - If CurContext is a __device__ or __global__ function, emits the
    ///   diagnostics immediately.
    /// - If CurContext is a __host__ __device__ function and we are compiling for
-  ///   the device, creates a deferred diagnostic which is emitted if and when
-  ///   the function is codegen'ed.
+  ///   the device, creates a diagnostic which is emitted if and when we realize
+  ///   that the function will be codegen'ed.
    ///
    /// Example usage:
    ///
@@ -9397,12 +9424,6 @@ public:
    void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD,
                                     const LookupResult &Previous);
  
-private:
-  /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a
-  /// (maybe deferred) "bad call" diagnostic.  We use this to avoid emitting the
-  /// same deferred diag twice.
-  llvm::DenseSet<unsigned> LocsWithCUDACallDiags;
-
  public:
    /// Check whether we're allowed to call Callee from the current context.
    ///
diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp

index e330e31d47feb6cf570afd548946510211290bd1..657d2e01dc73bbb28f8bebc11db6e6ffd340ebe6 100644 (file)
--- a/lib/AST/Decl.cpp
+++ b/lib/AST/Decl.cpp
@@ -3473,20 +3473,6 @@ unsigned FunctionDecl::getMemoryFunctionKind() const {
    return 0;
  }
  
-void FunctionDecl::addDeferredDiag(PartialDiagnosticAt PD) {
-  getASTContext().getDeferredDiags()[this].push_back(std::move(PD));
-}
-
-std::vector<PartialDiagnosticAt> FunctionDecl::takeDeferredDiags() const {
-  auto &DD = getASTContext().getDeferredDiags();
-  auto It = DD.find(this);
-  if (It == DD.end())
-    return {};
-  auto Ret = std::move(It->second);
-  DD.erase(It);
-  return Ret;
-}
-
  //===----------------------------------------------------------------------===//
  // FieldDecl Implementation
  //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp

index 281da3fd65b160c3cefe1f488d89a42327ebee6a..f4628fc0d33ada4ef9405bba9933d3542f245e77 100644 (file)
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -499,19 +499,6 @@ void CodeGenModule::Release() {
    EmitVersionIdentMetadata();
  
    EmitTargetMetadata();
-
-  // Emit any deferred diagnostics gathered during codegen.  We didn't emit them
-  // when we first discovered them because that would have halted codegen,
-  // preventing us from gathering other deferred diags.
-  for (const PartialDiagnosticAt &DiagAt : DeferredDiags) {
-    SourceLocation Loc = DiagAt.first;
-    const PartialDiagnostic &PD = DiagAt.second;
-    DiagnosticBuilder Builder(getDiags().Report(Loc, PD.getDiagID()));
-    PD.Emit(Builder);
-  }
-  // Clear the deferred diags so they don't outlive the ASTContext's
-  // PartialDiagnostic allocator.
-  DeferredDiags.clear();
  }
  
  void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
@@ -2913,37 +2900,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
                                                   llvm::GlobalValue *GV) {
    const auto *D = cast<FunctionDecl>(GD.getDecl());
  
-  // Emit this function's deferred diagnostics, if none of them are errors.  If
-  // any of them are errors, don't codegen the function, but also don't emit any
-  // of the diagnostics just yet.  Emitting an error during codegen stops
-  // further codegen, and we want to display as many deferred diags as possible.
-  // We'll emit the now twice-deferred diags at the very end of codegen.
-  //
-  // (If a function has both error and non-error diags, we don't emit the
-  // non-error diags here, because order can be significant, e.g. with notes
-  // that follow errors.)
-  auto Diags = D->takeDeferredDiags();
-  if (auto *Templ = D->getPrimaryTemplate()) {
-    auto TemplDiags = Templ->getAsFunction()->takeDeferredDiags();
-    Diags.insert(Diags.end(), TemplDiags.begin(), TemplDiags.end());
-  }
-  bool HasError = llvm::any_of(Diags, [this](const PartialDiagnosticAt &PDAt) {
-    return getDiags().getDiagnosticLevel(PDAt.second.getDiagID(), PDAt.first) >=
-           DiagnosticsEngine::Error;
-  });
-  if (HasError) {
-    DeferredDiags.insert(DeferredDiags.end(),
-                         std::make_move_iterator(Diags.begin()),
-                         std::make_move_iterator(Diags.end()));
-    return;
-  }
-  for (PartialDiagnosticAt &PDAt : Diags) {
-    const SourceLocation &Loc = PDAt.first;
-    const PartialDiagnostic &PD = PDAt.second;
-    DiagnosticBuilder Builder(getDiags().Report(Loc, PD.getDiagID()));
-    PD.Emit(Builder);
-  }
-
    // Compute the function info and LLVM type.
    const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
    llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h

index c19e5bcdc1b5e9d38c59bbf0ecfdeb9d2a29323f..e896b11a7aed31900ea5ca0ea6152573ed79d51b 100644 (file)
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -490,10 +490,6 @@ private:
    /// MDNodes.
    llvm::DenseMap<QualType, llvm::Metadata *> MetadataIdMap;
  
-  /// Diags gathered from FunctionDecl::takeDeferredDiags().  Emitted at the
-  /// very end of codegen.
-  std::vector<std::pair<SourceLocation, PartialDiagnostic>> DeferredDiags;
-
  public:
    CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
                  const PreprocessorOptions &ppopts,
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp

index 717fe4ad526a25a20a0a8c89a67a509b0457b148..5333a4427526a70ca2b0a47fe9f8f1d9d1e86216 100644 (file)
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -499,27 +499,84 @@ Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
      break;
    case K_Deferred:
      assert(Fn && "Must have a function to attach the deferred diag to.");
-    PartialDiagInfo.emplace(Loc, S.PDiag(DiagID), Fn);
+    PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn);
      break;
    }
  }
  
+// In CUDA, there are some constructs which may appear in semantically-valid
+// code, but trigger errors if we ever generate code for the function in which
+// they appear.  Essentially every construct you're not allowed to use on the
+// device falls into this category, because you are allowed to use these
+// constructs in a __host__ __device__ function, but only if that function is
+// never codegen'ed on the device.
+//
+// To handle semantic checking for these constructs, we keep track of the set of
+// functions we know will be emitted, either because we could tell a priori that
+// they would be emitted, or because they were transitively called by a
+// known-emitted function.
+//
+// We also keep a partial call graph of which not-known-emitted functions call
+// which other not-known-emitted functions.
+//
+// When we see something which is illegal if the current function is emitted
+// (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or
+// CheckCUDACall), we first check if the current function is known-emitted.  If
+// so, we immediately output the diagnostic.
+//
+// Otherwise, we "defer" the diagnostic.  It sits in Sema::CUDADeferredDiags
+// until we discover that the function is known-emitted, at which point we take
+// it out of this map and emit the diagnostic.
+
+// Do we know that we will eventually codegen the given function?
+static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
+  // Templates are emitted when they're instantiated.
+  if (FD->isDependentContext())
+    return false;
+
+  // When compiling for device, host functions are never emitted.  Similarly,
+  // when compiling for host, device and global functions are never emitted.
+  // (Technically, we do emit a host-side stub for global functions, but this
+  // doesn't count for our purposes here.)
+  Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
+  if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
+    return false;
+  if (!S.getLangOpts().CUDAIsDevice &&
+      (T == Sema::CFT_Device || T == Sema::CFT_Global))
+    return false;
+
+  // Externally-visible and similar functions are always emitted.
+  if (S.getASTContext().GetGVALinkageForFunction(FD) > GVA_DiscardableODR)
+    return true;
+
+  // Otherwise, the function is known-emitted if it's in our set of
+  // known-emitted functions.
+  return S.CUDAKnownEmittedFns.count(FD) > 0;
+}
+
  Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
                                                   unsigned DiagID) {
    assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
-  CUDADiagBuilder::Kind DiagKind;
-  switch (CurrentCUDATarget()) {
-  case CFT_Global:
-  case CFT_Device:
-    DiagKind = CUDADiagBuilder::K_Immediate;
-    break;
-  case CFT_HostDevice:
-    DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Deferred
-                                          : CUDADiagBuilder::K_Nop;
-    break;
-  default:
-    DiagKind = CUDADiagBuilder::K_Nop;
-  }
+  CUDADiagBuilder::Kind DiagKind = [&] {
+    switch (CurrentCUDATarget()) {
+    case CFT_Global:
+    case CFT_Device:
+      return CUDADiagBuilder::K_Immediate;
+    case CFT_HostDevice:
+      // An HD function counts as host code if we're compiling for host, and
+      // device code if we're compiling for device.  Defer any errors in device
+      // mode until the function is known-emitted.
+      if (getLangOpts().CUDAIsDevice) {
+        return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+                   ? CUDADiagBuilder::K_Immediate
+                   : CUDADiagBuilder::K_Deferred;
+      }
+      return CUDADiagBuilder::K_Nop;
+
+    default:
+      return CUDADiagBuilder::K_Nop;
+    }
+  }();
    return CUDADiagBuilder(DiagKind, Loc, DiagID,
                           dyn_cast<FunctionDecl>(CurContext), *this);
  }
@@ -527,41 +584,119 @@ Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
  Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
                                                 unsigned DiagID) {
    assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
-  CUDADiagBuilder::Kind DiagKind;
-  switch (CurrentCUDATarget()) {
-  case CFT_Host:
-    DiagKind = CUDADiagBuilder::K_Immediate;
-    break;
-  case CFT_HostDevice:
-    DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Nop
-                                          : CUDADiagBuilder::K_Deferred;
-    break;
-  default:
-    DiagKind = CUDADiagBuilder::K_Nop;
-  }
+  CUDADiagBuilder::Kind DiagKind = [&] {
+    switch (CurrentCUDATarget()) {
+    case CFT_Host:
+      return CUDADiagBuilder::K_Immediate;
+    case CFT_HostDevice:
+      // An HD function counts as host code if we're compiling for host, and
+      // device code if we're compiling for device.  Defer any errors in device
+      // mode until the function is known-emitted.
+      if (getLangOpts().CUDAIsDevice)
+        return CUDADiagBuilder::K_Nop;
+
+      return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+                 ? CUDADiagBuilder::K_Immediate
+                 : CUDADiagBuilder::K_Deferred;
+    default:
+      return CUDADiagBuilder::K_Nop;
+    }
+  }();
    return CUDADiagBuilder(DiagKind, Loc, DiagID,
                           dyn_cast<FunctionDecl>(CurContext), *this);
  }
  
+// Emit any deferred diagnostics for FD and erase them from the map in which
+// they're stored.
+static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) {
+  auto It = S.CUDADeferredDiags.find(FD);
+  if (It == S.CUDADeferredDiags.end())
+    return;
+  for (PartialDiagnosticAt &PDAt : It->second) {
+    const SourceLocation &Loc = PDAt.first;
+    const PartialDiagnostic &PD = PDAt.second;
+    DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID()));
+    Builder.setForceEmit();
+    PD.Emit(Builder);
+  }
+  S.CUDADeferredDiags.erase(It);
+}
+
+// Indicate that this function (and thus everything it transtively calls) will
+// be codegen'ed, and emit any deferred diagnostics on this function and its
+// (transitive) callees.
+static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
+  // Nothing to do if we already know that FD is emitted.
+  if (IsKnownEmitted(S, FD)) {
+    assert(!S.CUDACallGraph.count(FD));
+    return;
+  }
+
+  // We've just discovered that FD is known-emitted.  Walk our call graph to see
+  // what else we can now discover also must be emitted.
+  llvm::SmallVector<FunctionDecl *, 4> Worklist = {FD};
+  llvm::SmallSet<FunctionDecl *, 4> Seen;
+  Seen.insert(FD);
+  while (!Worklist.empty()) {
+    FunctionDecl *Caller = Worklist.pop_back_val();
+    assert(!IsKnownEmitted(S, Caller) &&
+           "Worklist should not contain known-emitted functions.");
+    S.CUDAKnownEmittedFns.insert(Caller);
+    EmitDeferredDiags(S, Caller);
+
+    // Deferred diags are often emitted on the template itself, so emit those as
+    // well.
+    if (auto *Templ = Caller->getPrimaryTemplate())
+      EmitDeferredDiags(S, Templ->getAsFunction());
+
+    // Add all functions called by Caller to our worklist.
+    auto CGIt = S.CUDACallGraph.find(Caller);
+    if (CGIt == S.CUDACallGraph.end())
+      continue;
+
+    for (FunctionDecl *Callee : CGIt->second) {
+      if (Seen.count(Callee) || IsKnownEmitted(S, Callee))
+        continue;
+      Seen.insert(Callee);
+      Worklist.push_back(Callee);
+    }
+
+    // Caller is now known-emitted, so we no longer need to maintain its list of
+    // callees in CUDACallGraph.
+    S.CUDACallGraph.erase(CGIt);
+  }
+}
+
  bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
    assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
    assert(Callee && "Callee may not be null.");
+  // FIXME: Is bailing out early correct here?  Should we instead assume that
+  // the caller is a global initializer?
    FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
    if (!Caller)
      return true;
  
-  CUDADiagBuilder::Kind DiagKind;
-  switch (IdentifyCUDAPreference(Caller, Callee)) {
-  case CFP_Never:
-    DiagKind = CUDADiagBuilder::K_Immediate;
-    break;
-  case CFP_WrongSide:
-    assert(Caller && "WrongSide calls require a non-null caller");
-    DiagKind = CUDADiagBuilder::K_Deferred;
-    break;
-  default:
-    DiagKind = CUDADiagBuilder::K_Nop;
-  }
+  bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
+  if (CallerKnownEmitted)
+    MarkKnownEmitted(*this, Callee);
+  else
+    CUDACallGraph[Caller].insert(Callee);
+
+  CUDADiagBuilder::Kind DiagKind = [&] {
+    switch (IdentifyCUDAPreference(Caller, Callee)) {
+    case CFP_Never:
+      return CUDADiagBuilder::K_Immediate;
+    case CFP_WrongSide:
+      assert(Caller && "WrongSide calls require a non-null caller");
+      // If we know the caller will be emitted, we know this wrong-side call
+      // will be emitted, so it's an immediate error.  Otherwise, defer the
+      // error until we know the caller is emitted.
+      return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate
+                                : CUDADiagBuilder::K_Deferred;
+    default:
+      return CUDADiagBuilder::K_Nop;
+    }
+  }();
  
    // Avoid emitting this error twice for the same location.  Using a hashtable
    // like this is unfortunate, but because we must continue parsing as normal
diff --git a/test/Parser/lambda-attr.cu b/test/Parser/lambda-attr.cu

index dfd6fc8ecdddb4263e36961d3272fac8204d916d..d5aaf01946055faee95de4211ca958d7cb753a97 100644 (file)
--- a/test/Parser/lambda-attr.cu
+++ b/test/Parser/lambda-attr.cu
@@ -2,7 +2,7 @@
  // RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcuda-is-device -verify %s
  
  __attribute__((device)) void device_fn() {}
-__attribute__((device)) void hd_fn() {}
+__attribute__((host, device)) void hd_fn() {}
  
  __attribute__((device)) void device_attr() {
    ([]() __attribute__((device)) { device_fn(); })();
diff --git a/test/SemaCUDA/call-host-fn-from-device.cu b/test/SemaCUDA/call-host-fn-from-device.cu

index b83beba58db91f0bffb527dd256e481c67d51747..bb6ea230fa2cc3c82d8a40a243d0e1779c45fe75 100644 (file)
--- a/test/SemaCUDA/call-host-fn-from-device.cu
+++ b/test/SemaCUDA/call-host-fn-from-device.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - -verify
+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \
+// RUN:   -emit-llvm -o /dev/null -verify
  
  // Note: This test won't work with -fsyntax-only, because some of these errors
  // are emitted during codegen.
diff --git a/test/SemaCUDA/function-overload.cu b/test/SemaCUDA/function-overload.cu

index 11e8bae12659824516f9aa96ad5674e214e79aec..4545a808759279f34d48629919ad69946eae4bff 100644 (file)
--- a/test/SemaCUDA/function-overload.cu
+++ b/test/SemaCUDA/function-overload.cu
@@ -170,18 +170,35 @@ __host__ __device__ void hostdevicef() {
    DeviceReturnTy ret_d = d();
    DeviceFnPtr fp_cd = cd;
    DeviceReturnTy ret_cd = cd();
+#if !defined(__CUDA_ARCH__)
+  // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}}
+  // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}}
+  // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}}
+  // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}}
+#endif
  
    HostFnPtr fp_h = h;
    HostReturnTy ret_h = h();
    HostFnPtr fp_ch = ch;
    HostReturnTy ret_ch = ch();
+#if defined(__CUDA_ARCH__)
+  // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}}
+  // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}}
+  // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}}
+  // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}}
+#endif
  
    CurrentFnPtr fp_dh = dh;
    CurrentReturnTy ret_dh = dh();
    CurrentFnPtr fp_cdh = cdh;
    CurrentReturnTy ret_cdh = cdh();
  
-  g(); // expected-error {{call to global function g not configured}}
+  g();
+#if defined (__CUDA_ARCH__)
+  // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
+#else
+  // expected-error@-4 {{call to global function g not configured}}
+#endif
  }
  
  // Test for address of overloaded function resolution in the global context.
@@ -297,7 +314,11 @@ __device__ void test_device_calls_template_fn() {
  
  // If we have a mix of HD and H-only or D-only candidates in the overload set,
  // normal C++ overload resolution rules apply first.
-template <typename T> TemplateReturnTy template_vs_hd_function(T arg) {
+template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
+#ifdef __CUDA_ARCH__
+//expected-note@-2 {{declared here}}
+#endif
+{
    return TemplateReturnTy();
  }
  __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
@@ -307,6 +328,9 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
  __host__ __device__ void test_host_device_calls_hd_template() {
    HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
    TemplateReturnTy ret2 = template_vs_hd_function(1);
+#ifdef __CUDA_ARCH__
+  // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
+#endif
  }
  
  __host__ void test_host_calls_hd_template() {
@@ -326,14 +350,30 @@ __device__ void test_device_calls_hd_template() {
  // side of compilation.
  __device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
  __device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
+#ifndef __CUDA_ARCH__
+  // expected-note@-3 {{'device_only_function' declared here}}
+  // expected-note@-3 {{'device_only_function' declared here}}
+#endif
  __host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
  __host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
+#ifdef __CUDA_ARCH__
+  // expected-note@-3 {{'host_only_function' declared here}}
+  // expected-note@-3 {{'host_only_function' declared here}}
+#endif
  
  __host__ __device__ void test_host_device_single_side_overloading() {
    DeviceReturnTy ret1 = device_only_function(1);
    DeviceReturnTy2 ret2 = device_only_function(1.0f);
+#ifndef __CUDA_ARCH__
+  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
+  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
+#endif
    HostReturnTy ret3 = host_only_function(1);
    HostReturnTy2 ret4 = host_only_function(1.0f);
+#ifdef __CUDA_ARCH__
+  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
+  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
+#endif
  }
  
  // Verify that we allow overloading function templates.
diff --git a/test/SemaCUDA/method-target.cu b/test/SemaCUDA/method-target.cu

index 505664586bf420b6a14ef28bdc5c49ebc9e8f1f1..8e17daa0c1233dfc819a97d0465a656249ac1de6 100644 (file)
--- a/test/SemaCUDA/method-target.cu
+++ b/test/SemaCUDA/method-target.cu
@@ -29,7 +29,7 @@ __device__ void foo2(S2& s, int i, float f) {
  // Test 3: device method called from host function
  
  struct S3 {
-  __device__ void method() {} // expected-note {{'method' declared here}};
+  __device__ void method() {} // expected-note {{'method' declared here}}
  };
  
  void foo3(S3& s) {
@@ -40,11 +40,11 @@ void foo3(S3& s) {
  // Test 4: device method called from host&device function
  
  struct S4 {
-  __device__ void method() {}
+  __device__ void method() {}  // expected-note {{'method' declared here}}
  };
  
  __host__ __device__ void foo4(S4& s) {
-  s.method();
+  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ __device__ function}}
  }
  
  //------------------------------------------------------------------------------
diff --git a/test/SemaCUDA/reference-to-kernel-fn.cu b/test/SemaCUDA/reference-to-kernel-fn.cu

index 2d25dde94f5bda919e7f2eab35b138c4e8fda82c..e502d134b086944a7972a7dace1a1c72657ba0a6 100644 (file)
--- a/test/SemaCUDA/reference-to-kernel-fn.cu
+++ b/test/SemaCUDA/reference-to-kernel-fn.cu
@@ -18,10 +18,7 @@ typedef void (*fn_ptr_t)();
  __host__ __device__ fn_ptr_t get_ptr_hd() {
    return kernel;
  #ifdef DEVICE
-  // This emits a deferred error on the device, but we don't catch it in this
-  // file because the non-deferred error below precludes this.
-
-  // FIXME-expected-error@-2 {{reference to __global__ function}}
+  // expected-error@-2 {{reference to __global__ function}}
  #endif
  }
  __host__ fn_ptr_t get_ptr_h() {
author	Justin Lebar <jlebar@google.com>
	Thu, 13 Oct 2016 20:52:12 +0000 (20:52 +0000)
committer	Justin Lebar <jlebar@google.com>
	Thu, 13 Oct 2016 20:52:12 +0000 (20:52 +0000)
include/clang/AST/ASTContext.h		patch \| blob \| history
include/clang/AST/Decl.h		patch \| blob \| history
include/clang/Sema/Sema.h		patch \| blob \| history
lib/AST/Decl.cpp		patch \| blob \| history
lib/CodeGen/CodeGenModule.cpp		patch \| blob \| history
lib/CodeGen/CodeGenModule.h		patch \| blob \| history
lib/Sema/SemaCUDA.cpp		patch \| blob \| history
test/Parser/lambda-attr.cu		patch \| blob \| history
test/SemaCUDA/call-host-fn-from-device.cu		patch \| blob \| history
test/SemaCUDA/function-overload.cu		patch \| blob \| history
test/SemaCUDA/method-target.cu		patch \| blob \| history
test/SemaCUDA/reference-to-kernel-fn.cu		patch \| blob \| history