/// \brief Allocator for partial diagnostics.
PartialDiagnostic::StorageAllocator DiagAllocator;
- /// Diagnostics that are emitted if and only if the given function is
- /// codegen'ed. Access these through FunctionDecl::addDeferredDiag() and
- /// FunctionDecl::takeDeferredDiags().
- llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>>
- DeferredDiags;
-
/// \brief The current C++ ABI.
std::unique_ptr<CXXABI> ABI;
CXXABI *createCXXABI(const TargetInfo &T);
return DiagAllocator;
}
- decltype(DeferredDiags) &getDeferredDiags() { return DeferredDiags; }
- const decltype(DeferredDiags) &getDeferredDiags() const {
- return DeferredDiags;
- }
-
const TargetInfo &getTargetInfo() const { return *Target; }
const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
/// returns 0.
unsigned getMemoryFunctionKind() const;
- /// Add a diagnostic to be emitted if and when this function is codegen'ed.
- void addDeferredDiag(PartialDiagnosticAt PD);
-
- /// Gets this object's list of deferred diagnostics, if there are any.
- ///
- /// Although this is logically const, it clears our list of deferred diags.
- std::vector<PartialDiagnosticAt> takeDeferredDiags() const;
-
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
/// before incrementing, so you can emit an error.
bool PopForceCUDAHostDevice();
+ /// Diagnostics that are emitted only if we discover that the given function
+ /// must be codegen'ed. Because handling these correctly adds overhead to
+ /// compilation, this is currently only enabled for CUDA compilations.
+ llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>>
+ CUDADeferredDiags;
+
+ /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a
+ /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the
+ /// same deferred diag twice.
+ llvm::DenseSet<unsigned> LocsWithCUDACallDiags;
+
+ /// The set of CUDA functions that we've discovered must be emitted by tracing
+ /// the call graph. Functions that we can tell a priori must be emitted
+ /// aren't added to this set.
+ llvm::DenseSet<FunctionDecl *> CUDAKnownEmittedFns;
+
+ /// A partial call graph maintained during CUDA compilation to support
+ /// deferred diagnostics. Specifically, functions are only added here if, at
+ /// the time they're added, they are not known-emitted. As soon as we
+ /// discover that a function is known-emitted, we remove it and everything it
+ /// transitively calls from this set and add those functions to
+ /// CUDAKnownEmittedFns.
+ llvm::DenseMap<FunctionDecl *, llvm::SetVector<FunctionDecl *>> CUDACallGraph;
+
/// Diagnostic builder for CUDA errors which may or may not be deferred.
///
/// In CUDA, there exist constructs (e.g. variable-length arrays, try/catch)
private:
struct PartialDiagnosticInfo {
- PartialDiagnosticInfo(SourceLocation Loc, PartialDiagnostic PD,
+ PartialDiagnosticInfo(Sema &S, SourceLocation Loc, PartialDiagnostic PD,
FunctionDecl *Fn)
- : Loc(Loc), PD(std::move(PD)), Fn(Fn) {}
+ : S(S), Loc(Loc), PD(std::move(PD)), Fn(Fn) {}
- ~PartialDiagnosticInfo() { Fn->addDeferredDiag({Loc, std::move(PD)}); }
+ ~PartialDiagnosticInfo() {
+ S.CUDADeferredDiags[Fn].push_back({Loc, std::move(PD)});
+ }
+ Sema &S;
SourceLocation Loc;
PartialDiagnostic PD;
FunctionDecl *Fn;
/// - If CurContext is a __device__ or __global__ function, emits the
/// diagnostics immediately.
/// - If CurContext is a __host__ __device__ function and we are compiling for
- /// the device, creates a deferred diagnostic which is emitted if and when
- /// the function is codegen'ed.
+ /// the device, creates a diagnostic which is emitted if and when we realize
+ /// that the function will be codegen'ed.
///
/// Example usage:
///
void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD,
const LookupResult &Previous);
-private:
- /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a
- /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the
- /// same deferred diag twice.
- llvm::DenseSet<unsigned> LocsWithCUDACallDiags;
-
public:
/// Check whether we're allowed to call Callee from the current context.
///
return 0;
}
-void FunctionDecl::addDeferredDiag(PartialDiagnosticAt PD) {
- getASTContext().getDeferredDiags()[this].push_back(std::move(PD));
-}
-
-std::vector<PartialDiagnosticAt> FunctionDecl::takeDeferredDiags() const {
- auto &DD = getASTContext().getDeferredDiags();
- auto It = DD.find(this);
- if (It == DD.end())
- return {};
- auto Ret = std::move(It->second);
- DD.erase(It);
- return Ret;
-}
-
//===----------------------------------------------------------------------===//
// FieldDecl Implementation
//===----------------------------------------------------------------------===//
EmitVersionIdentMetadata();
EmitTargetMetadata();
-
- // Emit any deferred diagnostics gathered during codegen. We didn't emit them
- // when we first discovered them because that would have halted codegen,
- // preventing us from gathering other deferred diags.
- for (const PartialDiagnosticAt &DiagAt : DeferredDiags) {
- SourceLocation Loc = DiagAt.first;
- const PartialDiagnostic &PD = DiagAt.second;
- DiagnosticBuilder Builder(getDiags().Report(Loc, PD.getDiagID()));
- PD.Emit(Builder);
- }
- // Clear the deferred diags so they don't outlive the ASTContext's
- // PartialDiagnostic allocator.
- DeferredDiags.clear();
}
void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
llvm::GlobalValue *GV) {
const auto *D = cast<FunctionDecl>(GD.getDecl());
- // Emit this function's deferred diagnostics, if none of them are errors. If
- // any of them are errors, don't codegen the function, but also don't emit any
- // of the diagnostics just yet. Emitting an error during codegen stops
- // further codegen, and we want to display as many deferred diags as possible.
- // We'll emit the now twice-deferred diags at the very end of codegen.
- //
- // (If a function has both error and non-error diags, we don't emit the
- // non-error diags here, because order can be significant, e.g. with notes
- // that follow errors.)
- auto Diags = D->takeDeferredDiags();
- if (auto *Templ = D->getPrimaryTemplate()) {
- auto TemplDiags = Templ->getAsFunction()->takeDeferredDiags();
- Diags.insert(Diags.end(), TemplDiags.begin(), TemplDiags.end());
- }
- bool HasError = llvm::any_of(Diags, [this](const PartialDiagnosticAt &PDAt) {
- return getDiags().getDiagnosticLevel(PDAt.second.getDiagID(), PDAt.first) >=
- DiagnosticsEngine::Error;
- });
- if (HasError) {
- DeferredDiags.insert(DeferredDiags.end(),
- std::make_move_iterator(Diags.begin()),
- std::make_move_iterator(Diags.end()));
- return;
- }
- for (PartialDiagnosticAt &PDAt : Diags) {
- const SourceLocation &Loc = PDAt.first;
- const PartialDiagnostic &PD = PDAt.second;
- DiagnosticBuilder Builder(getDiags().Report(Loc, PD.getDiagID()));
- PD.Emit(Builder);
- }
-
// Compute the function info and LLVM type.
const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
/// MDNodes.
llvm::DenseMap<QualType, llvm::Metadata *> MetadataIdMap;
- /// Diags gathered from FunctionDecl::takeDeferredDiags(). Emitted at the
- /// very end of codegen.
- std::vector<std::pair<SourceLocation, PartialDiagnostic>> DeferredDiags;
-
public:
CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
const PreprocessorOptions &ppopts,
break;
case K_Deferred:
assert(Fn && "Must have a function to attach the deferred diag to.");
- PartialDiagInfo.emplace(Loc, S.PDiag(DiagID), Fn);
+ PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn);
break;
}
}
+// In CUDA, there are some constructs which may appear in semantically-valid
+// code, but trigger errors if we ever generate code for the function in which
+// they appear. Essentially every construct you're not allowed to use on the
+// device falls into this category, because you are allowed to use these
+// constructs in a __host__ __device__ function, but only if that function is
+// never codegen'ed on the device.
+//
+// To handle semantic checking for these constructs, we keep track of the set of
+// functions we know will be emitted, either because we could tell a priori that
+// they would be emitted, or because they were transitively called by a
+// known-emitted function.
+//
+// We also keep a partial call graph of which not-known-emitted functions call
+// which other not-known-emitted functions.
+//
+// When we see something which is illegal if the current function is emitted
+// (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or
+// CheckCUDACall), we first check if the current function is known-emitted. If
+// so, we immediately output the diagnostic.
+//
+// Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags
+// until we discover that the function is known-emitted, at which point we take
+// it out of this map and emit the diagnostic.
+
+// Do we know that we will eventually codegen the given function?
+static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
+ // Templates are emitted when they're instantiated.
+ if (FD->isDependentContext())
+ return false;
+
+ // When compiling for device, host functions are never emitted. Similarly,
+ // when compiling for host, device and global functions are never emitted.
+ // (Technically, we do emit a host-side stub for global functions, but this
+ // doesn't count for our purposes here.)
+ Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
+ if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
+ return false;
+ if (!S.getLangOpts().CUDAIsDevice &&
+ (T == Sema::CFT_Device || T == Sema::CFT_Global))
+ return false;
+
+ // Externally-visible and similar functions are always emitted.
+ if (S.getASTContext().GetGVALinkageForFunction(FD) > GVA_DiscardableODR)
+ return true;
+
+ // Otherwise, the function is known-emitted if it's in our set of
+ // known-emitted functions.
+ return S.CUDAKnownEmittedFns.count(FD) > 0;
+}
+
Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
unsigned DiagID) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
- CUDADiagBuilder::Kind DiagKind;
- switch (CurrentCUDATarget()) {
- case CFT_Global:
- case CFT_Device:
- DiagKind = CUDADiagBuilder::K_Immediate;
- break;
- case CFT_HostDevice:
- DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Deferred
- : CUDADiagBuilder::K_Nop;
- break;
- default:
- DiagKind = CUDADiagBuilder::K_Nop;
- }
+ CUDADiagBuilder::Kind DiagKind = [&] {
+ switch (CurrentCUDATarget()) {
+ case CFT_Global:
+ case CFT_Device:
+ return CUDADiagBuilder::K_Immediate;
+ case CFT_HostDevice:
+ // An HD function counts as host code if we're compiling for host, and
+ // device code if we're compiling for device. Defer any errors in device
+ // mode until the function is known-emitted.
+ if (getLangOpts().CUDAIsDevice) {
+ return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+ ? CUDADiagBuilder::K_Immediate
+ : CUDADiagBuilder::K_Deferred;
+ }
+ return CUDADiagBuilder::K_Nop;
+
+ default:
+ return CUDADiagBuilder::K_Nop;
+ }
+ }();
return CUDADiagBuilder(DiagKind, Loc, DiagID,
dyn_cast<FunctionDecl>(CurContext), *this);
}
Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
unsigned DiagID) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
- CUDADiagBuilder::Kind DiagKind;
- switch (CurrentCUDATarget()) {
- case CFT_Host:
- DiagKind = CUDADiagBuilder::K_Immediate;
- break;
- case CFT_HostDevice:
- DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Nop
- : CUDADiagBuilder::K_Deferred;
- break;
- default:
- DiagKind = CUDADiagBuilder::K_Nop;
- }
+ CUDADiagBuilder::Kind DiagKind = [&] {
+ switch (CurrentCUDATarget()) {
+ case CFT_Host:
+ return CUDADiagBuilder::K_Immediate;
+ case CFT_HostDevice:
+ // An HD function counts as host code if we're compiling for host, and
+ // device code if we're compiling for device. Defer any errors in device
+ // mode until the function is known-emitted.
+ if (getLangOpts().CUDAIsDevice)
+ return CUDADiagBuilder::K_Nop;
+
+ return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+ ? CUDADiagBuilder::K_Immediate
+ : CUDADiagBuilder::K_Deferred;
+ default:
+ return CUDADiagBuilder::K_Nop;
+ }
+ }();
return CUDADiagBuilder(DiagKind, Loc, DiagID,
dyn_cast<FunctionDecl>(CurContext), *this);
}
+// Emit any deferred diagnostics for FD and erase them from the map in which
+// they're stored.
+static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) {
+ auto It = S.CUDADeferredDiags.find(FD);
+ if (It == S.CUDADeferredDiags.end())
+ return;
+ for (PartialDiagnosticAt &PDAt : It->second) {
+ const SourceLocation &Loc = PDAt.first;
+ const PartialDiagnostic &PD = PDAt.second;
+ DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID()));
+ Builder.setForceEmit();
+ PD.Emit(Builder);
+ }
+ S.CUDADeferredDiags.erase(It);
+}
+
+// Indicate that this function (and thus everything it transtively calls) will
+// be codegen'ed, and emit any deferred diagnostics on this function and its
+// (transitive) callees.
+static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
+ // Nothing to do if we already know that FD is emitted.
+ if (IsKnownEmitted(S, FD)) {
+ assert(!S.CUDACallGraph.count(FD));
+ return;
+ }
+
+ // We've just discovered that FD is known-emitted. Walk our call graph to see
+ // what else we can now discover also must be emitted.
+ llvm::SmallVector<FunctionDecl *, 4> Worklist = {FD};
+ llvm::SmallSet<FunctionDecl *, 4> Seen;
+ Seen.insert(FD);
+ while (!Worklist.empty()) {
+ FunctionDecl *Caller = Worklist.pop_back_val();
+ assert(!IsKnownEmitted(S, Caller) &&
+ "Worklist should not contain known-emitted functions.");
+ S.CUDAKnownEmittedFns.insert(Caller);
+ EmitDeferredDiags(S, Caller);
+
+ // Deferred diags are often emitted on the template itself, so emit those as
+ // well.
+ if (auto *Templ = Caller->getPrimaryTemplate())
+ EmitDeferredDiags(S, Templ->getAsFunction());
+
+ // Add all functions called by Caller to our worklist.
+ auto CGIt = S.CUDACallGraph.find(Caller);
+ if (CGIt == S.CUDACallGraph.end())
+ continue;
+
+ for (FunctionDecl *Callee : CGIt->second) {
+ if (Seen.count(Callee) || IsKnownEmitted(S, Callee))
+ continue;
+ Seen.insert(Callee);
+ Worklist.push_back(Callee);
+ }
+
+ // Caller is now known-emitted, so we no longer need to maintain its list of
+ // callees in CUDACallGraph.
+ S.CUDACallGraph.erase(CGIt);
+ }
+}
+
bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
assert(Callee && "Callee may not be null.");
+ // FIXME: Is bailing out early correct here? Should we instead assume that
+ // the caller is a global initializer?
FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
if (!Caller)
return true;
- CUDADiagBuilder::Kind DiagKind;
- switch (IdentifyCUDAPreference(Caller, Callee)) {
- case CFP_Never:
- DiagKind = CUDADiagBuilder::K_Immediate;
- break;
- case CFP_WrongSide:
- assert(Caller && "WrongSide calls require a non-null caller");
- DiagKind = CUDADiagBuilder::K_Deferred;
- break;
- default:
- DiagKind = CUDADiagBuilder::K_Nop;
- }
+ bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
+ if (CallerKnownEmitted)
+ MarkKnownEmitted(*this, Callee);
+ else
+ CUDACallGraph[Caller].insert(Callee);
+
+ CUDADiagBuilder::Kind DiagKind = [&] {
+ switch (IdentifyCUDAPreference(Caller, Callee)) {
+ case CFP_Never:
+ return CUDADiagBuilder::K_Immediate;
+ case CFP_WrongSide:
+ assert(Caller && "WrongSide calls require a non-null caller");
+ // If we know the caller will be emitted, we know this wrong-side call
+ // will be emitted, so it's an immediate error. Otherwise, defer the
+ // error until we know the caller is emitted.
+ return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate
+ : CUDADiagBuilder::K_Deferred;
+ default:
+ return CUDADiagBuilder::K_Nop;
+ }
+ }();
// Avoid emitting this error twice for the same location. Using a hashtable
// like this is unfortunate, but because we must continue parsing as normal
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcuda-is-device -verify %s
__attribute__((device)) void device_fn() {}
-__attribute__((device)) void hd_fn() {}
+__attribute__((host, device)) void hd_fn() {}
__attribute__((device)) void device_attr() {
([]() __attribute__((device)) { device_fn(); })();
-// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - -verify
+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \
+// RUN: -emit-llvm -o /dev/null -verify
// Note: This test won't work with -fsyntax-only, because some of these errors
// are emitted during codegen.
DeviceReturnTy ret_d = d();
DeviceFnPtr fp_cd = cd;
DeviceReturnTy ret_cd = cd();
+#if !defined(__CUDA_ARCH__)
+ // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}}
+ // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}}
+ // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}}
+ // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}}
+#endif
HostFnPtr fp_h = h;
HostReturnTy ret_h = h();
HostFnPtr fp_ch = ch;
HostReturnTy ret_ch = ch();
+#if defined(__CUDA_ARCH__)
+ // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}}
+ // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}}
+ // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}}
+ // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}}
+#endif
CurrentFnPtr fp_dh = dh;
CurrentReturnTy ret_dh = dh();
CurrentFnPtr fp_cdh = cdh;
CurrentReturnTy ret_cdh = cdh();
- g(); // expected-error {{call to global function g not configured}}
+ g();
+#if defined (__CUDA_ARCH__)
+ // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
+#else
+ // expected-error@-4 {{call to global function g not configured}}
+#endif
}
// Test for address of overloaded function resolution in the global context.
// If we have a mix of HD and H-only or D-only candidates in the overload set,
// normal C++ overload resolution rules apply first.
-template <typename T> TemplateReturnTy template_vs_hd_function(T arg) {
+template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
+#ifdef __CUDA_ARCH__
+//expected-note@-2 {{declared here}}
+#endif
+{
return TemplateReturnTy();
}
__host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
__host__ __device__ void test_host_device_calls_hd_template() {
HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
TemplateReturnTy ret2 = template_vs_hd_function(1);
+#ifdef __CUDA_ARCH__
+ // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
+#endif
}
__host__ void test_host_calls_hd_template() {
// side of compilation.
__device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
__device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
+#ifndef __CUDA_ARCH__
+ // expected-note@-3 {{'device_only_function' declared here}}
+ // expected-note@-3 {{'device_only_function' declared here}}
+#endif
__host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
+#ifdef __CUDA_ARCH__
+ // expected-note@-3 {{'host_only_function' declared here}}
+ // expected-note@-3 {{'host_only_function' declared here}}
+#endif
__host__ __device__ void test_host_device_single_side_overloading() {
DeviceReturnTy ret1 = device_only_function(1);
DeviceReturnTy2 ret2 = device_only_function(1.0f);
+#ifndef __CUDA_ARCH__
+ // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
+ // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
+#endif
HostReturnTy ret3 = host_only_function(1);
HostReturnTy2 ret4 = host_only_function(1.0f);
+#ifdef __CUDA_ARCH__
+ // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
+ // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
+#endif
}
// Verify that we allow overloading function templates.
// Test 3: device method called from host function
struct S3 {
- __device__ void method() {} // expected-note {{'method' declared here}};
+ __device__ void method() {} // expected-note {{'method' declared here}}
};
void foo3(S3& s) {
// Test 4: device method called from host&device function
struct S4 {
- __device__ void method() {}
+ __device__ void method() {} // expected-note {{'method' declared here}}
};
__host__ __device__ void foo4(S4& s) {
- s.method();
+ s.method(); // expected-error {{reference to __device__ function 'method' in __host__ __device__ function}}
}
//------------------------------------------------------------------------------
__host__ __device__ fn_ptr_t get_ptr_hd() {
return kernel;
#ifdef DEVICE
- // This emits a deferred error on the device, but we don't catch it in this
- // file because the non-deferred error below precludes this.
-
- // FIXME-expected-error@-2 {{reference to __global__ function}}
+ // expected-error@-2 {{reference to __global__ function}}
#endif
}
__host__ fn_ptr_t get_ptr_h() {