From: Argyrios Kyrtzidis Date: Sun, 10 Aug 2008 13:15:22 +0000 (+0000) Subject: Allow the preprocessor to cache the lexed tokens, so that we can do efficient lookahe... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=03db1b31dd926409b7defc1c90b66549464652c0;p=clang Allow the preprocessor to cache the lexed tokens, so that we can do efficient lookahead and backtracking. 1) New public methods added: -EnableBacktrackAtThisPos -DisableBacktrack -Backtrack -isBacktrackEnabled 2) LookAhead() implementation is replaced with a more efficient one. 3) LookNext() is removed. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@54611 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 069c303f0b..772ce228a4 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -71,6 +71,9 @@ class Preprocessor { bool DisableMacroExpansion : 1; // True if macro expansion is disabled. bool InMacroArgs : 1; // True if parsing fn macro invocation args. + /// CacheTokens - True when the lexed tokens are cached for backtracking. + bool CacheTokens : 1; + /// Identifiers - This is mapping/lookup information for all identifiers in /// the program, including program keywords. IdentifierTable Identifiers; @@ -139,10 +142,24 @@ class Preprocessor { unsigned NumCachedTokenLexers; TokenLexer *TokenLexerCache[TokenLexerCacheSize]; - /// PeekedToken - Cache the token that was retrieved through LookNext(). - /// This is a valid token (its Location is valid) when LookNext() is - /// called and gets invalid again when it is "consumed" by Lex(). - Token PeekedToken; + // Cached tokens state. + + typedef std::vector CachedTokensTy; + + /// CachedTokens - Cached tokens are stored here when we do backtracking or + /// lookahead. They are "lexed" by the CachingLex() method. + CachedTokensTy CachedTokens; + + /// CachedLexPos - The position of the cached token that CachingLex() should + /// "lex" next. If it points beyond the CachedTokens vector, it means that + /// a normal Lex() should be invoked. + CachedTokensTy::size_type CachedLexPos; + + /// CachedBacktrackPos - Gets set by the EnableBacktrackAtThisPos() method, + /// to indicate the position where CachedLexPos should be set when the + /// BackTrack() method is invoked. + CachedTokensTy::size_type CachedBacktrackPos; + public: Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target, SourceManager &SM, HeaderSearch &Headers); @@ -258,7 +275,45 @@ public: /// lexer stack. This should only be used in situations where the current /// state of the top-of-stack lexer is known. void RemoveTopOfLexerStack(); - + + /// EnableBacktrackAtThisPos - From the point that this method is called, and + /// until DisableBacktrack() or Backtrack() is called, the Preprocessor keeps + /// track of the lexed tokens so that a subsequent Backtrack() call will make + /// the Preprocessor re-lex the same tokens. + /// + /// EnableBacktrackAtThisPos should not be called again until DisableBacktrack + /// or Backtrack is called. + /// + /// NOTE: *DO NOT* forget to call either DisableBacktrack() or Backtrack() at + /// some point after EnableBacktrackAtThisPos. If you don't, caching of tokens + /// will continue indefinitely. + /// + void EnableBacktrackAtThisPos() { + assert(!CacheTokens && "Backtrack is already enabled!"); + CacheTokens = true; + CachedBacktrackPos = CachedLexPos; + EnterCachingLexMode(); + } + + /// DisableBacktrack - Stop the caching of tokens that was enabled by + /// EnableBacktrackAtThisPos(). + void DisableBacktrack() { + assert(CacheTokens && "Backtrack is not enabled!"); + CacheTokens = false; + } + + /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since + /// EnableBacktrackAtThisPos() was previously called. + void Backtrack() { + assert(CacheTokens && "Backtrack is not enabled!"); + CacheTokens = false; + CachedLexPos = CachedBacktrackPos; + } + + /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and + /// caching of tokens is on. + bool isBacktrackEnabled() const { return CacheTokens; } + /// Lex - To lex a token from the preprocessor, just pull a token from the /// current lexer or macro object. void Lex(Token &Result) { @@ -266,11 +321,8 @@ public: CurLexer->Lex(Result); else if (CurTokenLexer) CurTokenLexer->Lex(Result); - else { - // We have a peeked token that hasn't been consumed yet. - Result = PeekedToken; - ConsumedPeekedToken(); - } + else + CachingLex(Result); } /// LexNonComment - Lex a token. If it's a comment, keep lexing until we get @@ -300,32 +352,12 @@ public: /// returned by Lex(), LookAhead(1) returns the token after it, etc. This /// returns normal tokens after phase 5. As such, it is equivalent to using /// 'Lex', not 'LexUnexpandedToken'. - /// - /// NOTE: is a relatively expensive method, so it should not be used in common - /// code paths if possible! - /// - Token LookAhead(unsigned N); - - /// LookNext - Returns the next token that would be returned by Lex() without - /// consuming it. - const Token &LookNext() { - if (PeekedToken.getLocation().isInvalid()) { - // We don't have a peeked token that hasn't been consumed yet. - // Peek it now. - PeekToken(); - } - return PeekedToken; + const Token &LookAhead(unsigned N) { + if (CachedLexPos + N < CachedTokens.size()) + return CachedTokens[CachedLexPos+N]; + else + return PeekAhead(N+1); } - -private: - /// PeekToken - Lexes one token into PeekedToken and pushes CurLexer, - /// CurLexerToken into the IncludeMacroStack before setting them to null. - void PeekToken(); - - /// ConsumedPeekedToken - Called when Lex() is about to return the PeekedToken - /// and have it "consumed". - void ConsumedPeekedToken(); -public: /// Diag - Forwarding function for diagnostics. This emits a diagnostic at /// the specified Token's location, translating the token's start @@ -523,6 +555,17 @@ private: bool isAngled, const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir); + //===--------------------------------------------------------------------===// + // Caching stuff. + void CachingLex(Token &Result); + bool InCachingLexMode() const { return CurLexer == 0 && CurTokenLexer == 0; } + void EnterCachingLexMode(); + void ExitCachingLexMode() { + if (InCachingLexMode()) + RemoveTopOfLexerStack(); + } + const Token &PeekAhead(unsigned N); + //===--------------------------------------------------------------------===// /// Handle*Directive - implement the various preprocessor directives. These /// should side-effect the current preprocessor object so that the next call diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h index 12efe648c7..cd63b9951c 100644 --- a/include/clang/Parse/Parser.h +++ b/include/clang/Parse/Parser.h @@ -198,10 +198,7 @@ private: /// Note that this differs from the Preprocessor's LookAhead method, because /// the Parser always has one token lexed that the preprocessor doesn't. /// - /// NOTE: is a relatively expensive method, so it should not be used in common - /// code paths if possible! - /// - Token GetLookAheadToken(unsigned N) { + const Token &GetLookAheadToken(unsigned N) { if (N == 0 || Tok.is(tok::eof)) return Tok; return PP.LookAhead(N-1); } @@ -209,7 +206,7 @@ private: /// NextToken - This peeks ahead one token and returns it without /// consuming it. const Token &NextToken() { - return PP.LookNext(); + return PP.LookAhead(0); } diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp new file mode 100644 index 0000000000..794e9c4e76 --- /dev/null +++ b/lib/Lex/PPCaching.cpp @@ -0,0 +1,63 @@ +//===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements pieces of the Preprocessor interface that manage the +// caching of lexed tokens. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +using namespace clang; + +void Preprocessor::CachingLex(Token &Result) { + if (CachedLexPos < CachedTokens.size()) { + Result = CachedTokens[CachedLexPos++]; + return; + } + + ExitCachingLexMode(); + Lex(Result); + + if (!CacheTokens) { + // All cached tokens were consumed. + CachedTokens.clear(); + CachedLexPos = 0; + return; + } + + // We should cache the lexed token. + + EnterCachingLexMode(); + if (Result.isNot(tok::eof)) { + CachedTokens.push_back(Result); + ++CachedLexPos; + } +} + +void Preprocessor::EnterCachingLexMode() { + if (InCachingLexMode()) + return; + + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurTokenLexer)); + CurLexer = 0; + CurTokenLexer = 0; +} + + +const Token &Preprocessor::PeekAhead(unsigned N) { + assert(CachedLexPos + N > CachedTokens.size() && "Confused caching."); + ExitCachingLexMode(); + for (unsigned C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) { + CachedTokens.push_back(Token()); + Lex(CachedTokens.back()); + } + EnterCachingLexMode(); + return CachedTokens.back(); +} diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index 1bedd5eded..a14cbed03a 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -60,94 +60,6 @@ Lexer *Preprocessor::getCurrentFileLexer() const { return 0; } -/// LookAhead - This peeks ahead N tokens and returns that token without -/// consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) returns -/// the token after Tok, etc. -/// -/// NOTE: is a relatively expensive method, so it should not be used in common -/// code paths if possible! -/// -Token Preprocessor::LookAhead(unsigned N) { - // FIXME: Optimize the case where multiple lookahead calls are used back to - // back. Consider if the the parser contained (dynamically): - // Lookahead(1); Lookahead(1); Lookahead(1) - // This would return the same token 3 times, but would end up making lots of - // token stream lexers to do it. To handle this common case, see if the top - // of the lexer stack is a TokenStreamLexer with macro expansion disabled. If - // so, see if it has 'N' tokens available in it. If so, just return the - // token. - - // FIXME: Optimize the case when the parser does multiple nearby lookahead - // calls. For example, consider: - // Lookahead(0); Lookahead(1); Lookahead(2); - // The previous optimization won't apply, and there won't be any space left in - // the array that was previously new'd. To handle this, always round up the - // size we new to a multiple of 16 tokens. If the previous buffer has space - // left, we can just grow it. This means we only have to do the new 1/16th as - // often. - - // Optimized LookAhead(0) case. - if (N == 0) - return LookNext(); - - Token *LookaheadTokens = new Token[N+1]; - - // Read N+1 tokens into LookaheadTokens. After this loop, Tok is the token - // to return. - Token Tok; - unsigned NumTokens = 0; - for (; N != ~0U; --N, ++NumTokens) { - Lex(Tok); - LookaheadTokens[NumTokens] = Tok; - - // If we got to EOF, don't lex past it. This will cause LookAhead to return - // the EOF token. - if (Tok.is(tok::eof)) - break; - } - - // Okay, at this point, we have the token we want to return in Tok. However, - // we read it and a bunch of other stuff (in LookaheadTokens) that we must - // allow subsequent calls to 'Lex' to return. To do this, we push a new token - // lexer onto the lexer stack with the tokens we read here. This passes - // ownership of LookaheadTokens to EnterTokenStream. - // - // Note that we disable macro expansion of the tokens from this buffer, since - // any macros have already been expanded, and the internal preprocessor state - // may already read past new macros. Consider something like LookAhead(1) on - // X - // #define X 14 - // Y - // The lookahead call should return 'Y', and the next Lex call should return - // 'X' even though X -> 14 has already been entered as a macro. - // - EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/, - true /*OwnsTokens*/); - return Tok; -} - -/// PeekToken - Lexes one token into PeekedToken and pushes CurLexer, -/// CurLexerToken into the IncludeMacroStack before setting them to null. -void Preprocessor::PeekToken() { - Lex(PeekedToken); - // Cache the current Lexer, TokenLexer and set them both to null. - // When Lex() is called, PeekedToken will be "consumed". - IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, - CurTokenLexer)); - CurLexer = 0; - CurTokenLexer = 0; -} - -/// ConsumedPeekedToken - Called when Lex() is about to return the PeekedToken -/// and have it "consumed". -void Preprocessor::ConsumedPeekedToken() { - assert(PeekedToken.getLocation().isValid() && "Confused Peeking?"); - // Restore CurLexer, TokenLexer. - RemoveTopOfLexerStack(); - // Make PeekedToken invalid. - PeekedToken.startToken(); -} - //===----------------------------------------------------------------------===// // Methods for Entering and Callbacks for leaving various contexts diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 631b8361e9..33c94b6e8a 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -68,6 +68,9 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, InMacroArgs = false; NumCachedTokenLexers = 0; + CacheTokens = false; + CachedLexPos = 0; + // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. // This gets unpoisoned where it is allowed. (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); @@ -579,4 +582,3 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { if (II.isExtensionToken() && Features.C99) Diag(Identifier, diag::ext_token_used); } -