From: Jordan Rose Date: Wed, 30 Jan 2013 01:52:57 +0000 (+0000) Subject: Don't warn about Unicode characters in -E mode. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=74c2498bb9e339345ee32bdd095e76157cec3b86;p=clang Don't warn about Unicode characters in -E mode. People use the C preprocessor for things other than C files. Some of them have Unicode characters. We shouldn't warn about Unicode characters appearing outside of identifiers in this case. There's not currently a way for the preprocessor to tell if it's in -E mode, so I added a new flag, derived from the PreprocessorOutputOptions. This is only used by the Unicode warnings for now, but could conceivably be used by other warnings or even behavioral differences later. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173881 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Frontend/PreprocessorOutputOptions.h b/include/clang/Frontend/PreprocessorOutputOptions.h index 9793aa6fa7..e273dd613d 100644 --- a/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/include/clang/Frontend/PreprocessorOutputOptions.h @@ -25,7 +25,7 @@ public: public: PreprocessorOutputOptions() { - ShowCPP = 1; + ShowCPP = 0; ShowComments = 0; ShowLineMarkers = 1; ShowMacroComments = 0; diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 24b6a18881..fc092e14e9 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -160,6 +160,9 @@ class Preprocessor : public RefCountedBase { /// \brief True if pragmas are enabled. bool PragmasEnabled : 1; + /// \brief True if the current build action is a preprocessing action. + bool PreprocessedOutput : 1; + /// \brief True if we are currently preprocessing a #if or #elif directive bool ParsingIfOrElifDirective; @@ -474,6 +477,16 @@ public: return SuppressIncludeNotFoundError; } + /// Sets whether the preprocessor is responsible for producing output or if + /// it is producing tokens to be consumed by Parse and Sema. + void setPreprocessedOutput(bool IsPreprocessedOutput) { + PreprocessedOutput = IsPreprocessedOutput; + } + + /// Returns true if the preprocessor is responsible for generating output, + /// false if it is producing tokens to be consumed by Parse and Sema. + bool isPreprocessedOutput() const { return PreprocessedOutput; } + /// isCurrentLexer - Return true if we are lexing directly from the specified /// lexer. bool isCurrentLexer(const PreprocessorLexer *L) const { diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp index a7f0770ec7..27f96b0974 100644 --- a/lib/Frontend/CompilerInstance.cpp +++ b/lib/Frontend/CompilerInstance.cpp @@ -243,6 +243,8 @@ void CompilerInstance::createPreprocessor() { InitializePreprocessor(*PP, PPOpts, getHeaderSearchOpts(), getFrontendOpts()); + PP->setPreprocessedOutput(getPreprocessorOutputOpts().ShowCPP); + // Set up the module path, including the hash for the // module-creation options. SmallString<256> SpecificModuleCache( diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index f49f30d878..b4b0ddb1c4 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -1395,9 +1395,48 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, } static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts, - ArgList &Args) { + ArgList &Args, + frontend::ActionKind Action) { using namespace options; - Opts.ShowCPP = !Args.hasArg(OPT_dM); + + switch (Action) { + case frontend::ASTDeclList: + case frontend::ASTDump: + case frontend::ASTDumpXML: + case frontend::ASTPrint: + case frontend::ASTView: + case frontend::EmitAssembly: + case frontend::EmitBC: + case frontend::EmitHTML: + case frontend::EmitLLVM: + case frontend::EmitLLVMOnly: + case frontend::EmitCodeGenOnly: + case frontend::EmitObj: + case frontend::FixIt: + case frontend::GenerateModule: + case frontend::GeneratePCH: + case frontend::GeneratePTH: + case frontend::ParseSyntaxOnly: + case frontend::PluginAction: + case frontend::PrintDeclContext: + case frontend::RewriteObjC: + case frontend::RewriteTest: + case frontend::RunAnalysis: + case frontend::MigrateSource: + Opts.ShowCPP = 0; + break; + + case frontend::DumpRawTokens: + case frontend::DumpTokens: + case frontend::InitOnly: + case frontend::PrintPreamble: + case frontend::PrintPreprocessedInput: + case frontend::RewriteMacros: + case frontend::RunPreprocessorOnly: + Opts.ShowCPP = !Args.hasArg(OPT_dM); + break; + } + Opts.ShowComments = Args.hasArg(OPT_C); Opts.ShowLineMarkers = !Args.hasArg(OPT_P); Opts.ShowMacroComments = Args.hasArg(OPT_CC); @@ -1478,7 +1517,8 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res, // parameters from the function and the "FileManager.h" #include. FileManager FileMgr(Res.getFileSystemOpts()); ParsePreprocessorArgs(Res.getPreprocessorOpts(), *Args, FileMgr, Diags); - ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args); + ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args, + Res.getFrontendOpts().ProgramAction); ParseTargetArgs(Res.getTargetOpts(), *Args); return Success; diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 3e3aaae5f5..08f406b069 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -2811,14 +2811,13 @@ static bool isUnicodeWhitespace(uint32_t C) { } void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { - if (isUnicodeWhitespace(C)) { - if (!isLexingRawMode()) { - CharSourceRange CharRange = - CharSourceRange::getCharRange(getSourceLocation(), - getSourceLocation(CurPtr)); - Diag(BufferPtr, diag::ext_unicode_whitespace) - << CharRange; - } + if (!isLexingRawMode() && !PP->isPreprocessedOutput() && + isUnicodeWhitespace(C)) { + CharSourceRange CharRange = + CharSourceRange::getCharRange(getSourceLocation(), + getSourceLocation(CurPtr)); + Diag(BufferPtr, diag::ext_unicode_whitespace) + << CharRange; Result.setFlag(Token::LeadingSpace); if (SkipWhitespace(Result, CurPtr)) @@ -2832,7 +2831,8 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { return LexIdentifier(Result, CurPtr); } - if (!isASCII(*BufferPtr) && !isAllowedIDChar(C)) { + if (!isLexingRawMode() && !PP->isPreprocessedOutput() && + !isASCII(*BufferPtr) && !isAllowedIDChar(C)) { // Non-ASCII characters tend to creep into source code unintentionally. // Instead of letting the parser complain about the unknown token, // just drop the character. @@ -2842,13 +2842,11 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { // loophole in the mapping of Unicode characters to basic character set // characters that allows us to map these particular characters to, say, // whitespace. - if (!isLexingRawMode()) { - CharSourceRange CharRange = - CharSourceRange::getCharRange(getSourceLocation(), - getSourceLocation(CurPtr)); - Diag(BufferPtr, diag::err_non_ascii) - << FixItHint::CreateRemoval(CharRange); - } + CharSourceRange CharRange = + CharSourceRange::getCharRange(getSourceLocation(), + getSourceLocation(CurPtr)); + Diag(BufferPtr, diag::err_non_ascii) + << FixItHint::CreateRemoval(CharRange); BufferPtr = CurPtr; return LexTokenInternal(Result); @@ -3537,11 +3535,15 @@ LexNextToken: if (Status == conversionOK) return LexUnicode(Result, CodePoint, CurPtr); + if (isLexingRawMode() || PP->isPreprocessedOutput()) { + Kind = tok::unknown; + break; + } + // Non-ASCII characters tend to creep into source code unintentionally. // Instead of letting the parser complain about the unknown token, // just diagnose the invalid UTF-8, then drop the character. - if (!isLexingRawMode()) - Diag(CurPtr, diag::err_invalid_utf8); + Diag(CurPtr, diag::err_invalid_utf8); BufferPtr = CurPtr+1; goto LexNextToken; diff --git a/test/Lexer/unicode.c b/test/Lexer/unicode.c index 1d7b53e2c5..26e77f61fd 100644 --- a/test/Lexer/unicode.c +++ b/test/Lexer/unicode.c @@ -1,6 +1,15 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -E -DPP_ONLY=1 %s -o %t +// RUN: FileCheck --strict-whitespace --input-file=%t %s // This file contains Unicode characters; please do not "fix" them! extern int x; // expected-warning {{treating Unicode character as whitespace}} extern int x; // expected-warning {{treating Unicode character as whitespace}} + +// CHECK: extern int {{x}} +// CHECK: extern int {{x}} + +#if PP_ONLY +CHECK: The preprocessor should not complain about Unicode characters like ©. +#endif