From: Justin Lebar <jlebar@google.com>
Date: Mon, 15 Aug 2016 23:00:49 +0000 (+0000)
Subject: [CUDA] Raise an error if a wrong-side call is codegen'ed.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2be279f314d7f2a45d0266d4103f520deb25f901;p=clang

[CUDA] Raise an error if a wrong-side call is codegen'ed.

Summary:
Some function calls in CUDA are allowed to appear in
semantically-correct programs but are an error if they're ever
codegen'ed.  Specifically, a host+device function may call a host
function, but it's an error if such a function is ever codegen'ed in
device mode (and vice versa).

Previously, clang made no attempt to catch these errors.  For the most
part, they would be caught by ptxas, and reported as "call to unknown
function 'foo'".

Now we catch these errors and report them the same as we report other
illegal calls (e.g. a call from a host function to a device function).

This has a small change in error-message behavior for calls that were
previously disallowed (e.g. calls from a host to a device function).
Previously, we'd catch disallowed calls fairly early, before doing
additional semantic checking e.g. of the call's arguments.  Now we catch
these illegal calls at the very end of our semantic checks, so we'll
only emit a "illegal CUDA call" error if the call is otherwise
well-formed.

Reviewers: tra, rnk

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D23242

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@278759 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 1a50030296..e0932d0f25 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -9186,6 +9186,18 @@ public:
   void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD,
                                    const LookupResult &Previous);
 
+  /// Check whether we're allowed to call Callee from the current context.
+  ///
+  /// If the call is never allowed in a semantically-correct program
+  /// (CFP_Never), emits an error and returns false.
+  ///
+  /// If the call is allowed in semantically-correct programs, but only if it's
+  /// never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to be
+  /// emitted if and when the caller is codegen'ed, and returns true.
+  ///
+  /// Otherwise, returns true without emitting any diagnostics.
+  bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee);
+
   /// Finds a function in \p Matches with highest calling priority
   /// from \p Caller context and erases all functions with lower
   /// calling priority.
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp
index 90af6d5a92..4f370d389c 100644
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -480,3 +480,33 @@ void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
   NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
 }
+
+bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
+  assert(getLangOpts().CUDA &&
+         "Should only be called during CUDA compilation.");
+  assert(Callee && "Callee may not be null.");
+  FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
+  if (!Caller)
+    return true;
+
+  Sema::CUDAFunctionPreference Pref = IdentifyCUDAPreference(Caller, Callee);
+  if (Pref == Sema::CFP_Never) {
+    Diag(Loc, diag::err_ref_bad_target) << IdentifyCUDATarget(Callee) << Callee
+                                        << IdentifyCUDATarget(Caller);
+    Diag(Callee->getLocation(), diag::note_previous_decl) << Callee;
+    return false;
+  }
+  if (Pref == Sema::CFP_WrongSide) {
+    // We have to do this odd dance to create our PartialDiagnostic because we
+    // want its storage to be allocated with operator new, not in an arena.
+    PartialDiagnostic PD{PartialDiagnostic::NullDiagnostic()};
+    PD.Reset(diag::err_ref_bad_target);
+    PD << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
+    Caller->addDeferredDiag({Loc, std::move(PD)});
+    Diag(Callee->getLocation(), diag::note_previous_decl) << Callee;
+    // This is not immediately an error, so return true.  The deferred errors
+    // will be emitted if and when Caller is codegen'ed.
+    return true;
+  }
+  return true;
+}
diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp
index 0dc6876923..585cd04d06 100644
--- a/lib/Sema/SemaDeclCXX.cpp
+++ b/lib/Sema/SemaDeclCXX.cpp
@@ -12274,6 +12274,8 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
              DeclInitType->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) &&
          "given constructor for wrong type");
   MarkFunctionReferenced(ConstructLoc, Constructor);
+  if (getLangOpts().CUDA && !CheckCUDACall(ConstructLoc, Constructor))
+    return ExprError();
 
   return CXXConstructExpr::Create(
       Context, DeclInitType, ConstructLoc, Constructor, Elidable,
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index 201248b3b3..78fa995e25 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -1739,17 +1739,9 @@ Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
                        const CXXScopeSpec *SS, NamedDecl *FoundD,
                        const TemplateArgumentListInfo *TemplateArgs) {
   if (getLangOpts().CUDA)
-    if (const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext))
-      if (const FunctionDecl *Callee = dyn_cast<FunctionDecl>(D)) {
-        if (!IsAllowedCUDACall(Caller, Callee)) {
-          Diag(NameInfo.getLoc(), diag::err_ref_bad_target)
-            << IdentifyCUDATarget(Callee) << D->getIdentifier()
-            << IdentifyCUDATarget(Caller);
-          Diag(D->getLocation(), diag::note_previous_decl)
-            << D->getIdentifier();
-          return ExprError();
-        }
-      }
+    if (FunctionDecl *Callee = dyn_cast<FunctionDecl>(D))
+      if (!CheckCUDACall(NameInfo.getLoc(), Callee))
+        return ExprError();
 
   bool RefersToCapturedVariable =
       isa<VarDecl>(D) &&
@@ -5138,37 +5130,35 @@ static bool isNumberOfArgsValidForCall(Sema &S, const FunctionDecl *Callee,
   return Callee->getMinRequiredArguments() <= NumArgs;
 }
 
-/// ActOnCallExpr - Handle a call to Fn with the specified array of arguments.
-/// This provides the location of the left/right parens and a list of comma
-/// locations.
-ExprResult
-Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
-                    MultiExprArg ArgExprs, SourceLocation RParenLoc,
-                    Expr *ExecConfig, bool IsExecConfig) {
+static ExprResult ActOnCallExprImpl(Sema &S, Scope *Scope, Expr *Fn,
+                                    SourceLocation LParenLoc,
+                                    MultiExprArg ArgExprs,
+                                    SourceLocation RParenLoc, Expr *ExecConfig,
+                                    bool IsExecConfig) {
   // Since this might be a postfix expression, get rid of ParenListExprs.
-  ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Fn);
+  ExprResult Result = S.MaybeConvertParenListExprToParenExpr(Scope, Fn);
   if (Result.isInvalid()) return ExprError();
   Fn = Result.get();
 
-  if (checkArgsForPlaceholders(*this, ArgExprs))
+  if (checkArgsForPlaceholders(S, ArgExprs))
     return ExprError();
 
-  if (getLangOpts().CPlusPlus) {
+  if (S.getLangOpts().CPlusPlus) {
     // If this is a pseudo-destructor expression, build the call immediately.
     if (isa<CXXPseudoDestructorExpr>(Fn)) {
       if (!ArgExprs.empty()) {
         // Pseudo-destructor calls should not have any arguments.
-        Diag(Fn->getLocStart(), diag::err_pseudo_dtor_call_with_args)
-          << FixItHint::CreateRemoval(
-                                    SourceRange(ArgExprs.front()->getLocStart(),
-                                                ArgExprs.back()->getLocEnd()));
+        S.Diag(Fn->getLocStart(), diag::err_pseudo_dtor_call_with_args)
+            << FixItHint::CreateRemoval(
+                   SourceRange(ArgExprs.front()->getLocStart(),
+                               ArgExprs.back()->getLocEnd()));
       }
 
-      return new (Context)
-          CallExpr(Context, Fn, None, Context.VoidTy, VK_RValue, RParenLoc);
+      return new (S.Context)
+          CallExpr(S.Context, Fn, None, S.Context.VoidTy, VK_RValue, RParenLoc);
     }
-    if (Fn->getType() == Context.PseudoObjectTy) {
-      ExprResult result = CheckPlaceholderExpr(Fn);
+    if (Fn->getType() == S.Context.PseudoObjectTy) {
+      ExprResult result = S.CheckPlaceholderExpr(Fn);
       if (result.isInvalid()) return ExprError();
       Fn = result.get();
     }
@@ -5183,50 +5173,53 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
 
     if (Dependent) {
       if (ExecConfig) {
-        return new (Context) CUDAKernelCallExpr(
-            Context, Fn, cast<CallExpr>(ExecConfig), ArgExprs,
-            Context.DependentTy, VK_RValue, RParenLoc);
+        return new (S.Context) CUDAKernelCallExpr(
+            S.Context, Fn, cast<CallExpr>(ExecConfig), ArgExprs,
+            S.Context.DependentTy, VK_RValue, RParenLoc);
       } else {
-        return new (Context) CallExpr(
-            Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc);
+        return new (S.Context)
+            CallExpr(S.Context, Fn, ArgExprs, S.Context.DependentTy, VK_RValue,
+                     RParenLoc);
       }
     }
 
     // Determine whether this is a call to an object (C++ [over.call.object]).
     if (Fn->getType()->isRecordType())
-      return BuildCallToObjectOfClassType(S, Fn, LParenLoc, ArgExprs,
-                                          RParenLoc);
+      return S.BuildCallToObjectOfClassType(Scope, Fn, LParenLoc, ArgExprs,
+                                            RParenLoc);
 
-    if (Fn->getType() == Context.UnknownAnyTy) {
-      ExprResult result = rebuildUnknownAnyFunction(*this, Fn);
+    if (Fn->getType() == S.Context.UnknownAnyTy) {
+      ExprResult result = rebuildUnknownAnyFunction(S, Fn);
       if (result.isInvalid()) return ExprError();
       Fn = result.get();
     }
 
-    if (Fn->getType() == Context.BoundMemberTy) {
-      return BuildCallToMemberFunction(S, Fn, LParenLoc, ArgExprs, RParenLoc);
+    if (Fn->getType() == S.Context.BoundMemberTy) {
+      return S.BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs,
+                                         RParenLoc);
     }
   }
 
   // Check for overloaded calls.  This can happen even in C due to extensions.
-  if (Fn->getType() == Context.OverloadTy) {
+  if (Fn->getType() == S.Context.OverloadTy) {
     OverloadExpr::FindResult find = OverloadExpr::find(Fn);
 
-    // We aren't supposed to apply this logic for if there's an '&' involved.
+    // We aren't supposed to apply this logic for if there'Scope an '&'
+    // involved.
     if (!find.HasFormOfMemberPointer) {
       OverloadExpr *ovl = find.Expression;
       if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(ovl))
-        return BuildOverloadedCallExpr(S, Fn, ULE, LParenLoc, ArgExprs,
-                                       RParenLoc, ExecConfig,
-                                       /*AllowTypoCorrection=*/true,
-                                       find.IsAddressOfOperand);
-      return BuildCallToMemberFunction(S, Fn, LParenLoc, ArgExprs, RParenLoc);
+        return S.BuildOverloadedCallExpr(
+            Scope, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig,
+            /*AllowTypoCorrection=*/true, find.IsAddressOfOperand);
+      return S.BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs,
+                                         RParenLoc);
     }
   }
 
   // If we're directly calling a function, get the appropriate declaration.
-  if (Fn->getType() == Context.UnknownAnyTy) {
-    ExprResult result = rebuildUnknownAnyFunction(*this, Fn);
+  if (Fn->getType() == S.Context.UnknownAnyTy) {
+    ExprResult result = rebuildUnknownAnyFunction(S, Fn);
     if (result.isInvalid()) return ExprError();
     Fn = result.get();
   }
@@ -5250,12 +5243,12 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
       // Rewrite the function decl for this builtin by replacing parameters
       // with no explicit address space with the address space of the arguments
       // in ArgExprs.
-      if ((FDecl = rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) {
+      if ((FDecl =
+               rewriteBuiltinFunctionDecl(&S, S.Context, FDecl, ArgExprs))) {
         NDecl = FDecl;
-        Fn = DeclRefExpr::Create(Context, FDecl->getQualifierLoc(),
-                           SourceLocation(), FDecl, false,
-                           SourceLocation(), FDecl->getType(),
-                           Fn->getValueKind(), FDecl);
+        Fn = DeclRefExpr::Create(
+            S.Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false,
+            SourceLocation(), FDecl->getType(), Fn->getValueKind(), FDecl);
       }
     }
   } else if (isa<MemberExpr>(NakedFn))
@@ -5263,8 +5256,8 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
 
   if (FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(NDecl)) {
     if (CallingNDeclIndirectly &&
-        !checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
-                                           Fn->getLocStart()))
+        !S.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
+                                             Fn->getLocStart()))
       return ExprError();
 
     // CheckEnableIf assumes that the we're passing in a sane number of args for
@@ -5274,22 +5267,42 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
     // number of args looks incorrect, don't do enable_if checks; we should've
     // already emitted an error about the bad call.
     if (FD->hasAttr<EnableIfAttr>() &&
-        isNumberOfArgsValidForCall(*this, FD, ArgExprs.size())) {
-      if (const EnableIfAttr *Attr = CheckEnableIf(FD, ArgExprs, true)) {
-        Diag(Fn->getLocStart(),
-             isa<CXXMethodDecl>(FD) ?
-                 diag::err_ovl_no_viable_member_function_in_call :
-                 diag::err_ovl_no_viable_function_in_call)
-          << FD << FD->getSourceRange();
-        Diag(FD->getLocation(),
-             diag::note_ovl_candidate_disabled_by_enable_if_attr)
+        isNumberOfArgsValidForCall(S, FD, ArgExprs.size())) {
+      if (const EnableIfAttr *Attr = S.CheckEnableIf(FD, ArgExprs, true)) {
+        S.Diag(Fn->getLocStart(),
+               isa<CXXMethodDecl>(FD)
+                   ? diag::err_ovl_no_viable_member_function_in_call
+                   : diag::err_ovl_no_viable_function_in_call)
+            << FD << FD->getSourceRange();
+        S.Diag(FD->getLocation(),
+               diag::note_ovl_candidate_disabled_by_enable_if_attr)
             << Attr->getCond()->getSourceRange() << Attr->getMessage();
       }
     }
   }
 
-  return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc,
-                               ExecConfig, IsExecConfig);
+  return S.BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc,
+                                 ExecConfig, IsExecConfig);
+}
+
+/// ActOnCallExpr - Handle a call to Fn with the specified array of arguments.
+/// This provides the location of the left/right parens and a list of comma
+/// locations.
+ExprResult Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
+                               MultiExprArg ArgExprs, SourceLocation RParenLoc,
+                               Expr *ExecConfig, bool IsExecConfig) {
+  ExprResult Ret = ActOnCallExprImpl(*this, S, Fn, LParenLoc, ArgExprs,
+                                     RParenLoc, ExecConfig, IsExecConfig);
+
+  // If appropriate, check that this is a valid CUDA call (and emit an error if
+  // the call is not allowed).
+  if (getLangOpts().CUDA && Ret.isUsable())
+    if (auto *Call = dyn_cast<CallExpr>(Ret.get()))
+      if (auto *FD = Call->getDirectCallee())
+        if (!CheckCUDACall(Call->getLocStart(), FD))
+          return ExprError();
+
+  return Ret;
 }
 
 /// ActOnAsTypeExpr - create a new asType (bitcast) from the arguments.
diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp
index 91a2478914..72ad9a4d71 100644
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp
@@ -12331,19 +12331,6 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
     new (Context) CXXMemberCallExpr(Context, MemExprE, Args,
                                     ResultType, VK, RParenLoc);
 
-  // (CUDA B.1): Check for invalid calls between targets.
-  if (getLangOpts().CUDA) {
-    if (const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext)) {
-      if (!IsAllowedCUDACall(Caller, Method)) {
-        Diag(MemExpr->getMemberLoc(), diag::err_ref_bad_target)
-            << IdentifyCUDATarget(Method) << Method->getIdentifier()
-            << IdentifyCUDATarget(Caller);
-        Diag(Method->getLocation(), diag::note_previous_decl) << Method;
-        return ExprError();
-      }
-    }
-  }
-
   // Check for a valid return type.
   if (CheckCallReturnType(Method->getReturnType(), MemExpr->getMemberLoc(),
                           TheCall, Method))
diff --git a/test/CodeGenCUDA/host-device-calls-host.cu b/test/CodeGenCUDA/host-device-calls-host.cu
deleted file mode 100644
index 94796a3c23..0000000000
--- a/test/CodeGenCUDA/host-device-calls-host.cu
+++ /dev/null
@@ -1,32 +0,0 @@
-// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -Wno-cuda-compat -emit-llvm -o - | FileCheck %s
-
-#include "Inputs/cuda.h"
-
-extern "C"
-void host_function() {}
-
-// CHECK-LABEL: define void @hd_function_a
-extern "C"
-__host__ __device__ void hd_function_a() {
-  // CHECK: call void @host_function
-  host_function();
-}
-
-// CHECK: declare void @host_function
-
-// CHECK-LABEL: define void @hd_function_b
-extern "C"
-__host__ __device__ void hd_function_b(bool b) { if (b) host_function(); }
-
-// CHECK-LABEL: define void @device_function_b
-extern "C"
-__device__ void device_function_b() { hd_function_b(false); }
-
-// CHECK-LABEL: define void @global_function
-extern "C"
-__global__ void global_function() {
-  // CHECK: call void @device_function_b
-  device_function_b();
-}
-
-// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
diff --git a/test/SemaCUDA/Inputs/cuda.h b/test/SemaCUDA/Inputs/cuda.h
index 18cafdf96a..d054670459 100644
--- a/test/SemaCUDA/Inputs/cuda.h
+++ b/test/SemaCUDA/Inputs/cuda.h
@@ -21,4 +21,9 @@ typedef struct cudaStream *cudaStream_t;
 
 int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
                       cudaStream_t stream = 0);
+
+// Device-side placement new overloads.
+__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; }
+__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; }
+
 #endif // !__NVCC__
diff --git a/test/SemaCUDA/call-device-fn-from-host.cu b/test/SemaCUDA/call-device-fn-from-host.cu
new file mode 100644
index 0000000000..0984faa290
--- /dev/null
+++ b/test/SemaCUDA/call-device-fn-from-host.cu
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - -verify
+
+// Note: This test won't work with -fsyntax-only, because some of these errors
+// are emitted during codegen.
+
+#include "Inputs/cuda.h"
+
+__device__ void device_fn() {}
+
+struct S {
+  __device__ S() {}
+  __device__ ~S() { device_fn(); }
+  int x;
+};
+
+struct T {
+  __host__ __device__ void hd() { device_fn(); }
+  // expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
+
+  // No error; this is (implicitly) inline and is never called, so isn't
+  // codegen'ed.
+  __host__ __device__ void hd2() { device_fn(); }
+
+  __host__ __device__ void hd3();
+
+  __device__ void d() {}
+};
+
+__host__ __device__ void T::hd3() {
+  device_fn();
+  // expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
+}
+
+template <typename T> __host__ __device__ void hd2() { device_fn(); }
+// expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
+void host_fn() { hd2<int>(); }
+
+__host__ __device__ void hd() { device_fn(); }
+// expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
+
+// No error because this is never instantiated.
+template <typename T> __host__ __device__ void hd3() { device_fn(); }
+
+__host__ __device__ void local_var() {
+  S s;
+  // expected-error@-1 {{reference to __device__ function 'S' in __host__ __device__ function}}
+}
+
+__host__ __device__ void placement_new(char *ptr) {
+  ::new(ptr) S();
+  // expected-error@-1 {{reference to __device__ function 'S' in __host__ __device__ function}}
+}
+
+__host__ __device__ void explicit_destructor(S *s) {
+  s->~S();
+  // expected-error@-1 {{reference to __device__ function '~S' in __host__ __device__ function}}
+}
+
+__host__ __device__ void hd_member_fn() {
+  T t;
+  // Necessary to trigger an error on T::hd.  It's (implicitly) inline, so
+  // isn't codegen'ed until we call it.
+  t.hd();
+}
+
+__host__ __device__ void h_member_fn() {
+  T t;
+  t.d();
+  // expected-error@-1 {{reference to __device__ function 'd' in __host__ __device__ function}}
+}
+
+__host__ __device__ void fn_ptr() {
+  auto* ptr = &device_fn;
+  // expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
+}
+
+template <typename T>
+__host__ __device__ void fn_ptr_template() {
+  auto* ptr = &device_fn;  // Not an error because the template isn't instantiated.
+}
diff --git a/test/SemaCUDA/call-host-fn-from-device.cu b/test/SemaCUDA/call-host-fn-from-device.cu
new file mode 100644
index 0000000000..ea7a4cce8d
--- /dev/null
+++ b/test/SemaCUDA/call-host-fn-from-device.cu
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - -verify
+
+// Note: This test won't work with -fsyntax-only, because some of these errors
+// are emitted during codegen.
+
+#include "Inputs/cuda.h"
+
+extern "C" void host_fn() {}
+
+struct S {
+  S() {}
+  ~S() { host_fn(); }
+  int x;
+};
+
+struct T {
+  __host__ __device__ void hd() { host_fn(); }
+  // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+
+  // No error; this is (implicitly) inline and is never called, so isn't
+  // codegen'ed.
+  __host__ __device__ void hd2() { host_fn(); }
+
+  __host__ __device__ void hd3();
+
+  void h() {}
+};
+
+__host__ __device__ void T::hd3() {
+  host_fn();
+  // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+}
+
+template <typename T> __host__ __device__ void hd2() { host_fn(); }
+// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+__global__ void kernel() { hd2<int>(); }
+
+__host__ __device__ void hd() { host_fn(); }
+// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+
+template <typename T> __host__ __device__ void hd3() { host_fn(); }
+// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+__device__ void device_fn() { hd3<int>(); }
+
+// No error because this is never instantiated.
+template <typename T> __host__ __device__ void hd4() { host_fn(); }
+
+__host__ __device__ void local_var() {
+  S s;
+  // expected-error@-1 {{reference to __host__ function 'S' in __host__ __device__ function}}
+}
+
+__host__ __device__ void placement_new(char *ptr) {
+  ::new(ptr) S();
+  // expected-error@-1 {{reference to __host__ function 'S' in __host__ __device__ function}}
+}
+
+__host__ __device__ void explicit_destructor(S *s) {
+  s->~S();
+  // expected-error@-1 {{reference to __host__ function '~S' in __host__ __device__ function}}
+}
+
+__host__ __device__ void hd_member_fn() {
+  T t;
+  // Necessary to trigger an error on T::hd.  It's (implicitly) inline, so
+  // isn't codegen'ed until we call it.
+  t.hd();
+}
+
+__host__ __device__ void h_member_fn() {
+  T t;
+  t.h();
+  // expected-error@-1 {{reference to __host__ function 'h' in __host__ __device__ function}}
+}
+
+__host__ __device__ void fn_ptr() {
+  auto* ptr = &host_fn;
+  // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+}
+
+template <typename T>
+__host__ __device__ void fn_ptr_template() {
+  auto* ptr = &host_fn;  // Not an error because the template isn't instantiated.
+}