From 985220f468a1bc5f8fd9ffd283427b8a486ec2bf Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Sat, 8 Oct 2016 22:15:58 +0000 Subject: [PATCH] [CUDA] Add #pragma clang force_cuda_host_device_{begin,end} pragmas. Summary: These cause us to consider all functions in-between to be __host__ __device__. You can nest these pragmas; you just can't have more 'end's than 'begin's. Reviewers: rsmith Subscribers: tra, jhen, cfe-commits Differential Revision: https://reviews.llvm.org/D24975 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@283677 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/DiagnosticParseKinds.td | 6 +++ include/clang/Parse/Parser.h | 1 + include/clang/Sema/Sema.h | 14 ++++++ include/clang/Serialization/ASTBitCodes.h | 6 ++- include/clang/Serialization/ASTReader.h | 4 ++ include/clang/Serialization/ASTWriter.h | 1 + lib/Parse/ParsePragma.cpp | 44 +++++++++++++++++++ lib/Sema/SemaCUDA.cpp | 27 ++++++++++++ lib/Serialization/ASTReader.cpp | 9 ++++ lib/Serialization/ASTWriter.cpp | 9 ++++ test/PCH/pragma-cuda-force-host-device.cu | 27 ++++++++++++ .../cuda-force-host-device-templates.cu | 41 +++++++++++++++++ test/Parser/cuda-force-host-device.cu | 36 +++++++++++++++ 13 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 test/PCH/pragma-cuda-force-host-device.cu create mode 100644 test/Parser/cuda-force-host-device-templates.cu create mode 100644 test/Parser/cuda-force-host-device.cu diff --git a/include/clang/Basic/DiagnosticParseKinds.td b/include/clang/Basic/DiagnosticParseKinds.td index db713e42dd..f1878d6392 100644 --- a/include/clang/Basic/DiagnosticParseKinds.td +++ b/include/clang/Basic/DiagnosticParseKinds.td @@ -1026,6 +1026,12 @@ def warn_pragma_unroll_cuda_value_in_parens : Warning< def warn_cuda_attr_lambda_position : Warning< "nvcc does not allow '__%0__' to appear after '()' in lambdas">, InGroup; +def warn_pragma_force_cuda_host_device_bad_arg : Warning< + "incorrect use of #pragma clang force_cuda_host_device begin|end">, + InGroup; +def err_pragma_cannot_end_force_cuda_host_device : Error< + "force_cuda_host_device end pragma without matching " + "force_cuda_host_device begin">; } // end of Parse Issue category. let CategoryName = "Modules Issue" in { diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h index b0b9b634ae..3ecbd62006 100644 --- a/include/clang/Parse/Parser.h +++ b/include/clang/Parse/Parser.h @@ -173,6 +173,7 @@ class Parser : public CodeCompletionHandler { std::unique_ptr MSSection; std::unique_ptr MSRuntimeChecks; std::unique_ptr MSIntrinsic; + std::unique_ptr CUDAForceHostDeviceHandler; std::unique_ptr OptimizeHandler; std::unique_ptr LoopHintHandler; std::unique_ptr UnrollHintHandler; diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index f6e8917426..8bd3916e0d 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -9219,6 +9219,20 @@ public: QualType FieldTy, bool IsMsStruct, Expr *BitWidth, bool *ZeroWidth = nullptr); +private: + unsigned ForceCUDAHostDeviceDepth = 0; + +public: + /// Increments our count of the number of times we've seen a pragma forcing + /// functions to be __host__ __device__. So long as this count is greater + /// than zero, all functions encountered will be __host__ __device__. + void PushForceCUDAHostDevice(); + + /// Decrements our count of the number of times we've seen a pragma forcing + /// functions to be __host__ __device__. Returns false if the count is 0 + /// before incrementing, so you can emit an error. + bool PopForceCUDAHostDevice(); + enum CUDAFunctionTarget { CFT_Device, CFT_Global, diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h index 32e075b759..dcd1b6af1d 100644 --- a/include/clang/Serialization/ASTBitCodes.h +++ b/include/clang/Serialization/ASTBitCodes.h @@ -580,7 +580,11 @@ namespace clang { MSSTRUCT_PRAGMA_OPTIONS = 55, /// \brief Record code for \#pragma ms_struct options. - POINTERS_TO_MEMBERS_PRAGMA_OPTIONS = 56 + POINTERS_TO_MEMBERS_PRAGMA_OPTIONS = 56, + + /// \brief Number of unmatched #pragma clang cuda_force_host_device begin + /// directives we've seen. + CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH = 57, }; /// \brief Record types used within a source manager block. diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h index b05b419d23..066cb4d9de 100644 --- a/include/clang/Serialization/ASTReader.h +++ b/include/clang/Serialization/ASTReader.h @@ -772,6 +772,10 @@ private: /// Sema tracks these to emit warnings. SmallVector UnusedLocalTypedefNameCandidates; + /// \brief Our current depth in #pragma cuda force_host_device begin/end + /// macros. + unsigned ForceCUDAHostDeviceDepth = 0; + /// \brief The IDs of the declarations Sema stores directly. /// /// Sema tracks a few important decls, such as namespace std, directly. diff --git a/include/clang/Serialization/ASTWriter.h b/include/clang/Serialization/ASTWriter.h index e83dfe2536..2462900cf8 100644 --- a/include/clang/Serialization/ASTWriter.h +++ b/include/clang/Serialization/ASTWriter.h @@ -459,6 +459,7 @@ private: void WriteDeclContextVisibleUpdate(const DeclContext *DC); void WriteFPPragmaOptions(const FPOptions &Opts); void WriteOpenCLExtensions(Sema &SemaRef); + void WriteCUDAPragmas(Sema &SemaRef); void WriteObjCCategories(); void WriteLateParsedTemplates(Sema &SemaRef); void WriteOptimizePragmaOptions(Sema &SemaRef); diff --git a/lib/Parse/ParsePragma.cpp b/lib/Parse/ParsePragma.cpp index 7ae03af2b1..d6539c9610 100644 --- a/lib/Parse/ParsePragma.cpp +++ b/lib/Parse/ParsePragma.cpp @@ -167,6 +167,16 @@ struct PragmaMSIntrinsicHandler : public PragmaHandler { Token &FirstToken) override; }; +struct PragmaForceCUDAHostDeviceHandler : public PragmaHandler { + PragmaForceCUDAHostDeviceHandler(Sema &Actions) + : PragmaHandler("force_cuda_host_device"), Actions(Actions) {} + void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &FirstToken) override; + +private: + Sema &Actions; +}; + } // end namespace void Parser::initializePragmaHandlers() { @@ -239,6 +249,12 @@ void Parser::initializePragmaHandlers() { PP.AddPragmaHandler(MSIntrinsic.get()); } + if (getLangOpts().CUDA) { + CUDAForceHostDeviceHandler.reset( + new PragmaForceCUDAHostDeviceHandler(Actions)); + PP.AddPragmaHandler("clang", CUDAForceHostDeviceHandler.get()); + } + OptimizeHandler.reset(new PragmaOptimizeHandler(Actions)); PP.AddPragmaHandler("clang", OptimizeHandler.get()); @@ -309,6 +325,11 @@ void Parser::resetPragmaHandlers() { MSIntrinsic.reset(); } + if (getLangOpts().CUDA) { + PP.RemovePragmaHandler("clang", CUDAForceHostDeviceHandler.get()); + CUDAForceHostDeviceHandler.reset(); + } + PP.RemovePragmaHandler("STDC", FPContractHandler.get()); FPContractHandler.reset(); @@ -2187,3 +2208,26 @@ void PragmaMSIntrinsicHandler::HandlePragma(Preprocessor &PP, PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) << "intrinsic"; } +void PragmaForceCUDAHostDeviceHandler::HandlePragma( + Preprocessor &PP, PragmaIntroducerKind Introducer, Token &Tok) { + Token FirstTok = Tok; + + PP.Lex(Tok); + IdentifierInfo *Info = Tok.getIdentifierInfo(); + if (!Info || (!Info->isStr("begin") && !Info->isStr("end"))) { + PP.Diag(FirstTok.getLocation(), + diag::warn_pragma_force_cuda_host_device_bad_arg); + return; + } + + if (Info->isStr("begin")) + Actions.PushForceCUDAHostDevice(); + else if (!Actions.PopForceCUDAHostDevice()) + PP.Diag(FirstTok.getLocation(), + diag::err_pragma_cannot_end_force_cuda_host_device); + + PP.Lex(Tok); + if (!Tok.is(tok::eod)) + PP.Diag(FirstTok.getLocation(), + diag::warn_pragma_force_cuda_host_device_bad_arg); +} diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp index cb7019242f..d6c0606674 100644 --- a/lib/Sema/SemaCUDA.cpp +++ b/lib/Sema/SemaCUDA.cpp @@ -23,6 +23,19 @@ #include "llvm/ADT/SmallVector.h" using namespace clang; +void Sema::PushForceCUDAHostDevice() { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + ForceCUDAHostDeviceDepth++; +} + +bool Sema::PopForceCUDAHostDevice() { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + if (ForceCUDAHostDeviceDepth == 0) + return false; + ForceCUDAHostDeviceDepth--; + return true; +} + ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, MultiExprArg ExecConfig, SourceLocation GGGLoc) { @@ -441,9 +454,23 @@ bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { // * a __device__ function with this signature was already declared, in which // case in which case we output an error, unless the __device__ decl is in a // system header, in which case we leave the constexpr function unattributed. +// +// In addition, all function decls are treated as __host__ __device__ when +// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a +// #pragma clang force_cuda_host_device_begin/end +// pair). void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD, const LookupResult &Previous) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + + if (ForceCUDAHostDeviceDepth > 0) { + if (!NewD->hasAttr()) + NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + if (!NewD->hasAttr()) + NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + return; + } + if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || NewD->isVariadic() || NewD->hasAttr() || NewD->hasAttr() || NewD->hasAttr()) diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index 6ae825b970..d12dda66db 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -3275,6 +3275,14 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { UnusedLocalTypedefNameCandidates.push_back( getGlobalDeclID(F, Record[I])); break; + + case CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH: + if (Record.size() != 1) { + Error("invalid cuda pragma options record"); + return Failure; + } + ForceCUDAHostDeviceDepth = Record[0]; + break; } } } @@ -7128,6 +7136,7 @@ void ASTReader::UpdateSema() { PragmaMSPointersToMembersState, PointersToMembersPragmaLocation); } + SemaObj->ForceCUDAHostDeviceDepth = ForceCUDAHostDeviceDepth; } IdentifierInfo *ASTReader::get(StringRef Name) { diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index cfe04433cf..da513a7076 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -1069,6 +1069,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS); RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES); RECORD(DELETE_EXPRS_TO_ANALYZE); + RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH); // SourceManager Block. BLOCK(SOURCE_MANAGER_BLOCK); @@ -3942,6 +3943,13 @@ void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) { Stream.EmitRecord(OPENCL_EXTENSIONS, Record); } +void ASTWriter::WriteCUDAPragmas(Sema &SemaRef) { + if (SemaRef.ForceCUDAHostDeviceDepth > 0) { + RecordData::value_type Record[] = {SemaRef.ForceCUDAHostDeviceDepth}; + Stream.EmitRecord(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH, Record); + } +} + void ASTWriter::WriteObjCCategories() { SmallVector CategoriesMap; RecordData Categories; @@ -4619,6 +4627,7 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, WriteIdentifierTable(PP, SemaRef.IdResolver, isModule); WriteFPPragmaOptions(SemaRef.getFPOptions()); WriteOpenCLExtensions(SemaRef); + WriteCUDAPragmas(SemaRef); WritePragmaDiagnosticMappings(Context.getDiagnostics(), isModule); // If we're emitting a module, write out the submodule information. diff --git a/test/PCH/pragma-cuda-force-host-device.cu b/test/PCH/pragma-cuda-force-host-device.cu new file mode 100644 index 0000000000..dc006be960 --- /dev/null +++ b/test/PCH/pragma-cuda-force-host-device.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -emit-pch %s -o %t +// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -include-pch %t -S -o /dev/null %s + +#ifndef HEADER +#define HEADER + +#pragma clang force_cuda_host_device begin +#pragma clang force_cuda_host_device begin +#pragma clang force_cuda_host_device end + +void hd1() {} + +#else + +void hd2() {} + +#pragma clang force_cuda_host_device end + +void host_only() {} + +__attribute__((device)) void device() { + hd1(); + hd2(); + host_only(); // expected-error {{no matching function for call}} +} + +#endif diff --git a/test/Parser/cuda-force-host-device-templates.cu b/test/Parser/cuda-force-host-device-templates.cu new file mode 100644 index 0000000000..68ec9c8e67 --- /dev/null +++ b/test/Parser/cuda-force-host-device-templates.cu @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=c++14 -S -verify -fcuda-is-device %s -o /dev/null + +// Check how the force_cuda_host_device pragma interacts with template +// instantiations. The errors here are emitted at codegen, so we can't do +// -fsyntax-only. + +template +auto foo() { // expected-note {{declared here}} + return T(); +} + +template +struct X { + void foo(); // expected-note {{declared here}} +}; + +#pragma clang force_cuda_host_device begin +__attribute__((host)) __attribute__((device)) void test() { + int n = foo(); // expected-error {{reference to __host__ function 'foo'}} + X().foo(); // expected-error {{reference to __host__ function 'foo'}} +} +#pragma clang force_cuda_host_device end + +// Same thing as above, but within a force_cuda_host_device block without a +// corresponding end. + +template +T bar() { // expected-note {{declared here}} + return T(); +} + +template +struct Y { + void bar(); // expected-note {{declared here}} +}; + +#pragma clang force_cuda_host_device begin +__attribute__((host)) __attribute__((device)) void test2() { + int n = bar(); // expected-error {{reference to __host__ function 'bar'}} + Y().bar(); // expected-error {{reference to __host__ function 'bar'}} +} diff --git a/test/Parser/cuda-force-host-device.cu b/test/Parser/cuda-force-host-device.cu new file mode 100644 index 0000000000..6064708aaa --- /dev/null +++ b/test/Parser/cuda-force-host-device.cu @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +// Check the force_cuda_host_device pragma. + +#pragma clang force_cuda_host_device begin +void f(); +#pragma clang force_cuda_host_device begin +void g(); +#pragma clang force_cuda_host_device end +void h(); +#pragma clang force_cuda_host_device end + +void i(); // expected-note {{not viable}} + +void host() { + f(); + g(); + h(); + i(); +} + +__attribute__((device)) void device() { + f(); + g(); + h(); + i(); // expected-error {{no matching function}} +} + +#pragma clang force_cuda_host_device foo +// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}} + +#pragma clang force_cuda_host_device +// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}} + +#pragma clang force_cuda_host_device begin foo +// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}} -- 2.40.0