From: Douglas Gregor Date: Tue, 20 Jul 2010 20:18:03 +0000 (+0000) Subject: Introduce a new lexer function to compute the "preamble" of a file, X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f033f1da4a34f8df6e95e9929dc04ff54bb8fb01;p=clang Introduce a new lexer function to compute the "preamble" of a file, which is the part of the file that contains all of the initial comments, includes, and preprocessor directives that occur before any of the actual code. Added a new -print-preamble cc1 action that is only used for testing. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@108913 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td index 4502401c16..72dca18fcd 100644 --- a/include/clang/Driver/CC1Options.td +++ b/include/clang/Driver/CC1Options.td @@ -304,6 +304,9 @@ def fixit_EQ : Joined<"-fixit=">, HelpText<"Apply fix-it advice creating a file with the given suffix">; def parse_print_callbacks : Flag<"-parse-print-callbacks">, HelpText<"Run parser and print each callback invoked">; +def print_preamble : Flag<"-print-preamble">, + HelpText<"Print the \"preamble\" of a file, which is a candidate for implicit" + " precompiled headers.">; def emit_html : Flag<"-emit-html">, HelpText<"Output input source as HTML">; def ast_print : Flag<"-ast-print">, diff --git a/include/clang/Frontend/FrontendActions.h b/include/clang/Frontend/FrontendActions.h index 26262cfa95..c172066e03 100644 --- a/include/clang/Frontend/FrontendActions.h +++ b/include/clang/Frontend/FrontendActions.h @@ -134,6 +134,16 @@ public: virtual bool hasCodeCompletionSupport() const; }; +class PrintPreambleAction : public FrontendAction { +protected: + void ExecuteAction(); + virtual ASTConsumer *CreateASTConsumer(CompilerInstance &, llvm::StringRef) { + return 0; + } + + virtual bool usesPreprocessorOnly() const { return true; } +}; + //===----------------------------------------------------------------------===// // Preprocessor Actions //===----------------------------------------------------------------------===// @@ -174,7 +184,7 @@ protected: virtual bool hasPCHSupport() const { return true; } }; - + } // end namespace clang #endif diff --git a/include/clang/Frontend/FrontendOptions.h b/include/clang/Frontend/FrontendOptions.h index 4010ea6dd7..263e571af4 100644 --- a/include/clang/Frontend/FrontendOptions.h +++ b/include/clang/Frontend/FrontendOptions.h @@ -44,6 +44,7 @@ namespace frontend { ParseSyntaxOnly, ///< Parse and perform semantic analysis. PluginAction, ///< Run a plugin action, \see ActionName. PrintDeclContext, ///< Print DeclContext and their Decls. + PrintPreamble, ///< Print the "preamble" of the input file PrintPreprocessedInput, ///< -E mode. RewriteMacros, ///< Expand macros but not #includes. RewriteObjC, ///< ObjC->C Rewriter. diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 6a6e319463..96d7605e4d 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -219,6 +219,19 @@ public: const SourceManager &SM, const LangOptions &LangOpts); + /// \brief Compute the preamble of the given file. + /// + /// The preamble of a file contains the initial comments, include directives, + /// and other preprocessor directives that occur before the code in this + /// particular file actually begins. The preamble of the main source file is + /// a potential prefix header. + /// + /// \param Buffer The memory buffer containing the file's contents. + /// + /// \returns The offset into the file where the preamble ends and the rest + /// of the file begins. + static unsigned ComputePreamble(const llvm::MemoryBuffer *Buffer); + //===--------------------------------------------------------------------===// // Internal implementation interfaces. private: diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index 00363d91fd..b007ac4bd3 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -331,6 +331,7 @@ static const char *getActionName(frontend::ActionKind Kind) { case frontend::ParsePrintCallbacks: return "-parse-print-callbacks"; case frontend::ParseSyntaxOnly: return "-fsyntax-only"; case frontend::PrintDeclContext: return "-print-decl-contexts"; + case frontend::PrintPreamble: return "-print-preamble"; case frontend::PrintPreprocessedInput: return "-E"; case frontend::RewriteMacros: return "-rewrite-macros"; case frontend::RewriteObjC: return "-rewrite-objc"; @@ -989,6 +990,8 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, Opts.ProgramAction = frontend::ParseSyntaxOnly; break; case OPT_print_decl_contexts: Opts.ProgramAction = frontend::PrintDeclContext; break; + case OPT_print_preamble: + Opts.ProgramAction = frontend::PrintPreamble; break; case OPT_E: Opts.ProgramAction = frontend::PrintPreprocessedInput; break; case OPT_rewrite_macros: diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp index 3a53dee806..4db9c11ee3 100644 --- a/lib/Frontend/FrontendActions.cpp +++ b/lib/Frontend/FrontendActions.cpp @@ -19,6 +19,7 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/Utils.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" using namespace clang; @@ -192,3 +193,32 @@ void PrintPreprocessedAction::ExecuteAction() { DoPrintPreprocessedInput(CI.getPreprocessor(), OS, CI.getPreprocessorOutputOpts()); } + +void PrintPreambleAction::ExecuteAction() { + switch (getCurrentFileKind()) { + case IK_C: + case IK_CXX: + case IK_ObjC: + case IK_ObjCXX: + case IK_OpenCL: + break; + + case IK_None: + case IK_Asm: + case IK_PreprocessedC: + case IK_PreprocessedCXX: + case IK_PreprocessedObjC: + case IK_PreprocessedObjCXX: + case IK_AST: + case IK_LLVM_IR: + // We can't do anything with these. + return; + } + + llvm::MemoryBuffer *Buffer = llvm::MemoryBuffer::getFile(getCurrentFile()); + if (Buffer) { + unsigned Preamble = Lexer::ComputePreamble(Buffer); + llvm::outs().write(Buffer->getBufferStart(), Preamble); + delete Buffer; + } +} diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 91b14f638d..2f11c37e65 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -28,6 +28,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Basic/SourceManager.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MemoryBuffer.h" #include @@ -247,6 +248,130 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, return TheTok.getLength(); } +namespace { + enum PreambleDirectiveKind { + PDK_Skipped, + PDK_StartIf, + PDK_EndIf, + PDK_Unknown + }; +} + +unsigned Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer) { + // Create a lexer starting at the beginning of the file. Note that we use a + // "fake" file source location at offset 1 so that the lexer will track our + // position within the file. + const unsigned StartOffset = 1; + SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset); + LangOptions LangOpts; + Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(), + Buffer->getBufferStart(), Buffer->getBufferEnd()); + + bool InPreprocessorDirective = false; + Token TheTok; + Token IfStartTok; + unsigned IfCount = 0; + do { + TheLexer.LexFromRawLexer(TheTok); + + if (InPreprocessorDirective) { + // If we've hit the end of the file, we're done. + if (TheTok.getKind() == tok::eof) { + InPreprocessorDirective = false; + break; + } + + // If we haven't hit the end of the preprocessor directive, skip this + // token. + if (!TheTok.isAtStartOfLine()) + continue; + + // We've passed the end of the preprocessor directive, and will look + // at this token again below. + InPreprocessorDirective = false; + } + + // Comments are okay; skip over them. + if (TheTok.getKind() == tok::comment) + continue; + + if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) { + // This is the start of a preprocessor directive. + Token HashTok = TheTok; + InPreprocessorDirective = true; + + // Figure out which direective this is. Since we're lexing raw tokens, + // we don't have an identifier table available. Instead, just look at + // the raw identifier to recognize and categorize preprocessor directives. + TheLexer.LexFromRawLexer(TheTok); + if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) { + const char *IdStart = Buffer->getBufferStart() + + TheTok.getLocation().getRawEncoding() - 1; + llvm::StringRef Keyword(IdStart, TheTok.getLength()); + PreambleDirectiveKind PDK + = llvm::StringSwitch(Keyword) + .Case("include", PDK_Skipped) + .Case("__include_macros", PDK_Skipped) + .Case("define", PDK_Skipped) + .Case("undef", PDK_Skipped) + .Case("line", PDK_Skipped) + .Case("error", PDK_Skipped) + .Case("pragma", PDK_Skipped) + .Case("import", PDK_Skipped) + .Case("include_next", PDK_Skipped) + .Case("warning", PDK_Skipped) + .Case("ident", PDK_Skipped) + .Case("sccs", PDK_Skipped) + .Case("assert", PDK_Skipped) + .Case("unassert", PDK_Skipped) + .Case("if", PDK_StartIf) + .Case("ifdef", PDK_StartIf) + .Case("ifndef", PDK_StartIf) + .Case("elif", PDK_Skipped) + .Case("else", PDK_Skipped) + .Case("endif", PDK_EndIf) + .Default(PDK_Unknown); + + switch (PDK) { + case PDK_Skipped: + continue; + + case PDK_StartIf: + if (IfCount == 0) + IfStartTok = HashTok; + + ++IfCount; + continue; + + case PDK_EndIf: + // Mismatched #endif. The preamble ends here. + if (IfCount == 0) + break; + + --IfCount; + continue; + + case PDK_Unknown: + // We don't know what this directive is; stop at the '#'. + break; + } + } + + // We only end up here if we didn't recognize the preprocessor + // directive or it was one that can't occur in the preamble at this + // point. Roll back the current token to the location of the '#'. + InPreprocessorDirective = false; + TheTok = HashTok; + } + + // We hit a token + break; + } while (true); + + SourceLocation End = IfCount? IfStartTok.getLocation() : TheTok.getLocation(); + return End.getRawEncoding() - StartLoc.getRawEncoding(); +} + //===----------------------------------------------------------------------===// // Character information. //===----------------------------------------------------------------------===// diff --git a/test/Lexer/Inputs/preamble.txt b/test/Lexer/Inputs/preamble.txt new file mode 100644 index 0000000000..c5f7288fa5 --- /dev/null +++ b/test/Lexer/Inputs/preamble.txt @@ -0,0 +1,11 @@ +// Preamble detection test: see below for comments and test commands. + +#include +#ifndef FOO +#else +#ifdef BAR +#elif WIBBLE +#endif +#pragma unknown +#endif + diff --git a/test/Lexer/preamble.c b/test/Lexer/preamble.c new file mode 100644 index 0000000000..b1f2fadd1c --- /dev/null +++ b/test/Lexer/preamble.c @@ -0,0 +1,25 @@ +// Preamble detection test: see below for comments and test commands. + +#include +#ifndef FOO +#else +#ifdef BAR +#elif WIBBLE +#endif +#pragma unknown +#endif + +#ifdef WIBBLE +#include "honk" +#else +int foo(); +#endif + +// This test checks for detection of the preamble of a file, which +// includes all of the starting comments and #includes. Note that any +// changes to the preamble part of this file must be mirrored in +// Inputs/preamble.txt, since we diff against it. + +// RUN: %clang_cc1 -print-preamble %s > %t +// RUN: diff %t %S/Inputs/preamble.txt + diff --git a/tools/driver/cc1_main.cpp b/tools/driver/cc1_main.cpp index 841e40abfc..3b13f0c736 100644 --- a/tools/driver/cc1_main.cpp +++ b/tools/driver/cc1_main.cpp @@ -102,6 +102,7 @@ static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) { } case PrintDeclContext: return new DeclContextPrintAction(); + case PrintPreamble: return new PrintPreambleAction(); case PrintPreprocessedInput: return new PrintPreprocessedAction(); case RewriteMacros: return new RewriteMacrosAction(); case RewriteObjC: return new RewriteObjCAction();