From: Argyrios Kyrtzidis <akyrtzi@gmail.com>
Date: Sun, 10 Aug 2008 13:15:22 +0000 (+0000)
Subject: Allow the preprocessor to cache the lexed tokens, so that we can do efficient lookahe... 
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=03db1b31dd926409b7defc1c90b66549464652c0;p=clang

Allow the preprocessor to cache the lexed tokens, so that we can do efficient lookahead and backtracking.

1) New public methods added:
  -EnableBacktrackAtThisPos
  -DisableBacktrack
  -Backtrack
  -isBacktrackEnabled

2) LookAhead() implementation is replaced with a more efficient one.
3) LookNext() is removed.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@54611 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index 069c303f0b..772ce228a4 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -71,6 +71,9 @@ class Preprocessor {
   bool DisableMacroExpansion : 1;  // True if macro expansion is disabled.
   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
 
+  /// CacheTokens - True when the lexed tokens are cached for backtracking.
+  bool CacheTokens : 1;
+
   /// Identifiers - This is mapping/lookup information for all identifiers in
   /// the program, including program keywords.
   IdentifierTable Identifiers;
@@ -139,10 +142,24 @@ class Preprocessor {
   unsigned NumCachedTokenLexers;
   TokenLexer *TokenLexerCache[TokenLexerCacheSize];
 
-  /// PeekedToken - Cache the token that was retrieved through LookNext().
-  /// This is a valid token (its Location is valid) when LookNext() is
-  /// called and gets invalid again when it is "consumed" by Lex().
-  Token PeekedToken;
+  // Cached tokens state.
+
+  typedef std::vector<Token> CachedTokensTy;
+
+  /// CachedTokens - Cached tokens are stored here when we do backtracking or
+  /// lookahead. They are "lexed" by the CachingLex() method.
+  CachedTokensTy CachedTokens;
+
+  /// CachedLexPos - The position of the cached token that CachingLex() should
+  /// "lex" next. If it points beyond the CachedTokens vector, it means that
+  /// a normal Lex() should be invoked.
+  CachedTokensTy::size_type CachedLexPos;
+
+  /// CachedBacktrackPos - Gets set by the EnableBacktrackAtThisPos() method,
+  /// to indicate the position where CachedLexPos should be set when the
+  /// BackTrack() method is invoked.
+  CachedTokensTy::size_type CachedBacktrackPos;
+
 public:
   Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target,
                SourceManager &SM, HeaderSearch &Headers);
@@ -258,7 +275,45 @@ public:
   /// lexer stack.  This should only be used in situations where the current
   /// state of the top-of-stack lexer is known.
   void RemoveTopOfLexerStack();
-  
+
+  /// EnableBacktrackAtThisPos - From the point that this method is called, and
+  /// until DisableBacktrack() or Backtrack() is called, the Preprocessor keeps
+  /// track of the lexed tokens so that a subsequent Backtrack() call will make
+  /// the Preprocessor re-lex the same tokens.
+  ///
+  /// EnableBacktrackAtThisPos should not be called again until DisableBacktrack
+  /// or Backtrack is called.
+  ///
+  /// NOTE: *DO NOT* forget to call either DisableBacktrack() or Backtrack() at
+  /// some point after EnableBacktrackAtThisPos. If you don't, caching of tokens
+  /// will continue indefinitely.
+  ///
+  void EnableBacktrackAtThisPos() {
+    assert(!CacheTokens && "Backtrack is already enabled!");
+    CacheTokens = true;
+    CachedBacktrackPos = CachedLexPos;
+    EnterCachingLexMode();
+  }
+
+  /// DisableBacktrack - Stop the caching of tokens that was enabled by
+  /// EnableBacktrackAtThisPos().
+  void DisableBacktrack() {
+    assert(CacheTokens && "Backtrack is not enabled!");
+    CacheTokens = false;
+  }
+
+  /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
+  /// EnableBacktrackAtThisPos() was previously called. 
+  void Backtrack() {
+    assert(CacheTokens && "Backtrack is not enabled!");
+    CacheTokens = false;
+    CachedLexPos = CachedBacktrackPos;
+  }
+
+  /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
+  /// caching of tokens is on.
+  bool isBacktrackEnabled() const { return CacheTokens; }
+
   /// Lex - To lex a token from the preprocessor, just pull a token from the
   /// current lexer or macro object.
   void Lex(Token &Result) {
@@ -266,11 +321,8 @@ public:
       CurLexer->Lex(Result);
     else if (CurTokenLexer)
       CurTokenLexer->Lex(Result);
-    else {
-      // We have a peeked token that hasn't been consumed yet.
-      Result = PeekedToken;
-      ConsumedPeekedToken();
-    }
+    else
+      CachingLex(Result);
   }
   
   /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
@@ -300,32 +352,12 @@ public:
   /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
   /// returns normal tokens after phase 5.  As such, it is equivalent to using
   /// 'Lex', not 'LexUnexpandedToken'.
-  ///
-  /// NOTE: is a relatively expensive method, so it should not be used in common
-  /// code paths if possible!
-  ///
-  Token LookAhead(unsigned N);
-
-  /// LookNext - Returns the next token that would be returned by Lex() without
-  /// consuming it.
-  const Token &LookNext() {
-    if (PeekedToken.getLocation().isInvalid()) {
-      // We don't have a peeked token that hasn't been consumed yet.
-      // Peek it now.
-      PeekToken();
-    }
-    return PeekedToken;
+  const Token &LookAhead(unsigned N) {
+    if (CachedLexPos + N < CachedTokens.size())
+      return CachedTokens[CachedLexPos+N];
+    else
+      return PeekAhead(N+1);
   }
-
-private:
-  /// PeekToken - Lexes one token into PeekedToken and pushes CurLexer,
-  /// CurLexerToken into the IncludeMacroStack before setting them to null.
-  void PeekToken();
-
-  /// ConsumedPeekedToken - Called when Lex() is about to return the PeekedToken
-  /// and have it "consumed".
-  void ConsumedPeekedToken();
-public:
   
   /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
   /// the specified Token's location, translating the token's start
@@ -523,6 +555,17 @@ private:
                               bool isAngled, const DirectoryLookup *FromDir,
                               const DirectoryLookup *&CurDir);
     
+  //===--------------------------------------------------------------------===//
+  // Caching stuff.
+  void CachingLex(Token &Result);
+  bool InCachingLexMode() const { return CurLexer == 0 && CurTokenLexer == 0; }
+  void EnterCachingLexMode();
+  void ExitCachingLexMode() {
+    if (InCachingLexMode())
+      RemoveTopOfLexerStack();
+  }
+  const Token &PeekAhead(unsigned N);
+
   //===--------------------------------------------------------------------===//
   /// Handle*Directive - implement the various preprocessor directives.  These
   /// should side-effect the current preprocessor object so that the next call
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index 12efe648c7..cd63b9951c 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -198,10 +198,7 @@ private:
   /// Note that this differs from the Preprocessor's LookAhead method, because
   /// the Parser always has one token lexed that the preprocessor doesn't.
   ///
-  /// NOTE: is a relatively expensive method, so it should not be used in common
-  /// code paths if possible!
-  ///
-  Token GetLookAheadToken(unsigned N) {
+  const Token &GetLookAheadToken(unsigned N) {
     if (N == 0 || Tok.is(tok::eof)) return Tok;
     return PP.LookAhead(N-1);
   }
@@ -209,7 +206,7 @@ private:
   /// NextToken - This peeks ahead one token and returns it without
   /// consuming it.
   const Token &NextToken() {
-    return PP.LookNext();
+    return PP.LookAhead(0);
   }
   
   
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
new file mode 100644
index 0000000000..794e9c4e76
--- /dev/null
+++ b/lib/Lex/PPCaching.cpp
@@ -0,0 +1,63 @@
+//===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pieces of the Preprocessor interface that manage the
+// caching of lexed tokens.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+void Preprocessor::CachingLex(Token &Result) {
+  if (CachedLexPos < CachedTokens.size()) {
+    Result = CachedTokens[CachedLexPos++];
+    return;
+  }
+
+  ExitCachingLexMode();
+  Lex(Result);
+
+  if (!CacheTokens) {
+    // All cached tokens were consumed.
+    CachedTokens.clear();
+    CachedLexPos = 0;
+    return;
+  }
+
+  // We should cache the lexed token.
+
+  EnterCachingLexMode();
+  if (Result.isNot(tok::eof)) {
+    CachedTokens.push_back(Result);
+    ++CachedLexPos;
+  }
+}
+
+void Preprocessor::EnterCachingLexMode() {
+  if (InCachingLexMode())
+    return;
+
+  IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
+                                               CurTokenLexer));
+  CurLexer = 0;
+  CurTokenLexer = 0;
+}
+
+
+const Token &Preprocessor::PeekAhead(unsigned N) {
+  assert(CachedLexPos + N > CachedTokens.size() && "Confused caching.");
+  ExitCachingLexMode();
+  for (unsigned C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) {
+    CachedTokens.push_back(Token());
+    Lex(CachedTokens.back());
+  }
+  EnterCachingLexMode();
+  return CachedTokens.back();
+}
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index 1bedd5eded..a14cbed03a 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -60,94 +60,6 @@ Lexer *Preprocessor::getCurrentFileLexer() const {
   return 0;
 }
 
-/// LookAhead - This peeks ahead N tokens and returns that token without
-/// consuming any tokens.  LookAhead(0) returns 'Tok', LookAhead(1) returns
-/// the token after Tok, etc.
-///
-/// NOTE: is a relatively expensive method, so it should not be used in common
-/// code paths if possible!
-///
-Token Preprocessor::LookAhead(unsigned N) {
-  // FIXME: Optimize the case where multiple lookahead calls are used back to
-  // back.  Consider if the the parser contained (dynamically):
-  //    Lookahead(1); Lookahead(1); Lookahead(1)
-  // This would return the same token 3 times, but would end up making lots of
-  // token stream lexers to do it.  To handle this common case, see if the top
-  // of the lexer stack is a TokenStreamLexer with macro expansion disabled.  If
-  // so, see if it has 'N' tokens available in it.  If so, just return the
-  // token.
-  
-  // FIXME: Optimize the case when the parser does multiple nearby lookahead
-  // calls.  For example, consider:
-  //   Lookahead(0); Lookahead(1); Lookahead(2);
-  // The previous optimization won't apply, and there won't be any space left in
-  // the array that was previously new'd.  To handle this, always round up the
-  // size we new to a multiple of 16 tokens.  If the previous buffer has space
-  // left, we can just grow it.  This means we only have to do the new 1/16th as
-  // often.
-
-  // Optimized LookAhead(0) case.
-  if (N == 0)
-    return LookNext();
-  
-  Token *LookaheadTokens = new Token[N+1];
-
-  // Read N+1 tokens into LookaheadTokens.  After this loop, Tok is the token
-  // to return.
-  Token Tok;
-  unsigned NumTokens = 0;
-  for (; N != ~0U; --N, ++NumTokens) {
-    Lex(Tok);
-    LookaheadTokens[NumTokens] = Tok;
-    
-    // If we got to EOF, don't lex past it.  This will cause LookAhead to return
-    // the EOF token.
-    if (Tok.is(tok::eof))
-      break;
-  }
-
-  // Okay, at this point, we have the token we want to return in Tok.  However,
-  // we read it and a bunch of other stuff (in LookaheadTokens) that we must
-  // allow subsequent calls to 'Lex' to return.  To do this, we push a new token
-  // lexer onto the lexer stack with the tokens we read here.  This passes
-  // ownership of LookaheadTokens to EnterTokenStream.
-  //
-  // Note that we disable macro expansion of the tokens from this buffer, since
-  // any macros have already been expanded, and the internal preprocessor state
-  // may already read past new macros.  Consider something like LookAhead(1) on
-  //      X
-  //      #define X 14
-  //      Y
-  // The lookahead call should return 'Y', and the next Lex call should return
-  // 'X' even though X -> 14 has already been entered as a macro.
-  //
-  EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/,
-                   true /*OwnsTokens*/);
-  return Tok;
-}
-
-/// PeekToken - Lexes one token into PeekedToken and pushes CurLexer,
-/// CurLexerToken into the IncludeMacroStack before setting them to null.
-void Preprocessor::PeekToken() {
-  Lex(PeekedToken);
-  // Cache the current Lexer, TokenLexer and set them both to null.
-  // When Lex() is called, PeekedToken will be "consumed".
-  IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
-                                               CurTokenLexer));
-  CurLexer = 0;
-  CurTokenLexer = 0;
-}
-
-/// ConsumedPeekedToken - Called when Lex() is about to return the PeekedToken
-/// and have it "consumed".
-void Preprocessor::ConsumedPeekedToken() {
-  assert(PeekedToken.getLocation().isValid() && "Confused Peeking?");
-  // Restore CurLexer, TokenLexer.
-  RemoveTopOfLexerStack();
-  // Make PeekedToken invalid.
-  PeekedToken.startToken();
-}
-
 
 //===----------------------------------------------------------------------===//
 // Methods for Entering and Callbacks for leaving various contexts
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 631b8361e9..33c94b6e8a 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -68,6 +68,9 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
   InMacroArgs = false;
   NumCachedTokenLexers = 0;
 
+  CacheTokens = false;
+  CachedLexPos = 0;
+
   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
   // This gets unpoisoned where it is allowed.
   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
@@ -579,4 +582,3 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
   if (II.isExtensionToken() && Features.C99) 
     Diag(Identifier, diag::ext_token_used);
 }
-