From 6b884508c3bc97cc9df9516adb92fbf88dd0a2e4 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Mon, 10 Mar 2008 06:06:04 +0000
Subject: [PATCH] implement simple support for arbitrary token lookahead. 
 Change the objc @try parser to use it, fixing a FIXME.  Update the
 objc-try-catch-1.m file to pass now that we get more reasonable errors.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@48129 91177308-0d34-0410-b5e6-96231b3b80d8
---
 Lex/MacroArgs.cpp                |  3 +-
 Lex/PPLexerChange.cpp            | 74 ++++++++++++++++++++++++++++----
 Lex/TokenLexer.cpp               |  9 ++--
 Parse/ParseObjc.cpp              | 32 ++++++--------
 include/clang/Lex/Preprocessor.h | 29 ++++++++++---
 include/clang/Lex/TokenLexer.h   | 15 +++++--
 include/clang/Parse/Parser.h     | 18 +++++++-
 test/Parser/objc-try-catch-1.m   |  7 ++-
 8 files changed, 145 insertions(+), 42 deletions(-)

diff --git a/Lex/MacroArgs.cpp b/Lex/MacroArgs.cpp
index bebf5cd2d8..a26e50eb76 100644
--- a/Lex/MacroArgs.cpp
+++ b/Lex/MacroArgs.cpp
@@ -112,7 +112,8 @@ MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) {
   // this, we set up a fake TokenLexer to lex from the unexpanded argument
   // list.  With this installed, we lex expanded tokens until we hit the EOF
   // token at the end of the unexp list.
-  PP.EnterTokenStream(AT, NumToks);
+  PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, 
+                      false /*owns tokens*/);
 
   // Lex all of the macro-expanded tokens into Result.
   do {
diff --git a/Lex/PPLexerChange.cpp b/Lex/PPLexerChange.cpp
index c5998c93ef..95028501f7 100644
--- a/Lex/PPLexerChange.cpp
+++ b/Lex/PPLexerChange.cpp
@@ -25,7 +25,7 @@ PPCallbacks::~PPCallbacks() {
 
 
 //===----------------------------------------------------------------------===//
-// Source File Location Methods.
+// Miscellaneous Methods.
 //===----------------------------------------------------------------------===//
 
 /// isInPrimaryFile - Return true if we're in the top-level file, not in a
@@ -60,6 +60,54 @@ Lexer *Preprocessor::getCurrentFileLexer() const {
   return 0;
 }
 
+/// LookAhead - This peeks ahead N tokens and returns that token without
+/// consuming any tokens.  LookAhead(0) returns 'Tok', LookAhead(1) returns
+/// the token after Tok, etc.
+///
+/// NOTE: is a relatively expensive method, so it should not be used in common
+/// code paths if possible!
+///
+Token Preprocessor::LookAhead(unsigned N) {
+  Token *LookaheadTokens = new Token[N];
+
+  // Read N+1 tokens into LookaheadTokens.  After this loop, Tok is the token
+  // to return.
+  Token Tok;
+  unsigned NumTokens = 0;
+  for (; N != ~0U; --N, ++NumTokens) {
+    Lex(Tok);
+    LookaheadTokens[NumTokens] = Tok;
+    
+    // If we got to EOF, don't lex past it.  This will cause LookAhead to return
+    // the EOF token.
+    if (Tok.is(tok::eof))
+      break;
+  }
+
+  // Okay, at this point, we have the token we want to return in Tok.  However,
+  // we read it and a bunch of other stuff (in LookaheadTokens) that we must
+  // allow subsequent calls to 'Lex' to return.  To do this, we push a new token
+  // lexer onto the lexer stack with the tokens we read here.  This passes
+  // ownership of LookaheadTokens to EnterTokenStream.
+  //
+  // Note that we disable macro expansion of the tokens from this buffer, since
+  // any macros have already been expanded, and the internal preprocessor state
+  // may already read past new macros.  Consider something like LookAhead(1) on
+  //      X
+  //      #define X 14
+  //      Y
+  // The lookahead call should return 'Y', and the next Lex call should return
+  // 'X' even though X -> 14 has already been entered as a macro.
+  //
+  EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/,
+                   true /*OwnsTokens*/);
+  return Tok;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for Entering and Callbacks for leaving various contexts
+//===----------------------------------------------------------------------===//
 
 /// EnterSourceFile - Add a source file to the top of the include stack and
 /// start lexing tokens from it instead of the current buffer.  Return true
@@ -123,11 +171,20 @@ void Preprocessor::EnterMacro(Token &Tok, MacroArgs *Args) {
 }
 
 /// EnterTokenStream - Add a "macro" context to the top of the include stack,
-/// which will cause the lexer to start returning the specified tokens.  Note
-/// that these tokens will be re-macro-expanded when/if expansion is enabled.
-/// This method assumes that the specified stream of tokens has a permanent
-/// owner somewhere, so they do not need to be copied.
-void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks) {
+/// which will cause the lexer to start returning the specified tokens.
+///
+/// If DisableMacroExpansion is true, tokens lexed from the token stream will
+/// not be subject to further macro expansion.  Otherwise, these tokens will
+/// be re-macro-expanded when/if expansion is enabled.
+///
+/// If OwnsTokens is false, this method assumes that the specified stream of
+/// tokens has a permanent owner somewhere, so they do not need to be copied.
+/// If it is true, it assumes the array of tokens is allocated with new[] and
+/// must be freed.
+///
+void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
+                                    bool DisableMacroExpansion,
+                                    bool OwnsTokens) {
   // Save our current state.
   IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
                                                CurTokenLexer));
@@ -136,10 +193,11 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks) {
 
   // Create a macro expander to expand from the specified token stream.
   if (NumCachedTokenLexers == 0) {
-    CurTokenLexer = new TokenLexer(Toks, NumToks, *this);
+    CurTokenLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion,
+                                   OwnsTokens, *this);
   } else {
     CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers];
-    CurTokenLexer->Init(Toks, NumToks);
+    CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
   }
 }
 
diff --git a/Lex/TokenLexer.cpp b/Lex/TokenLexer.cpp
index c91b753b6a..fc8cfd715c 100644
--- a/Lex/TokenLexer.cpp
+++ b/Lex/TokenLexer.cpp
@@ -36,6 +36,7 @@ void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) {
   HasLeadingSpace = Tok.hasLeadingSpace();
   Tokens = &*Macro->tokens_begin();
   OwnsTokens = false;
+  DisableMacroExpansion = false;
   NumTokens = Macro->tokens_end()-Macro->tokens_begin();
 
   // If this is a function-like macro, expand the arguments and change
@@ -53,7 +54,8 @@ void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) {
 
 /// Create a TokenLexer for the specified token stream.  This does not
 /// take ownership of the specified token vector.
-void TokenLexer::Init(const Token *TokArray, unsigned NumToks) {
+void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
+                      bool disableMacroExpansion, bool ownsTokens) {
   // If the client is reusing a TokenLexer, make sure to free any memory
   // associated with it.
   destroy();
@@ -61,7 +63,8 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks) {
   Macro = 0;
   ActualArgs = 0;
   Tokens = TokArray;
-  OwnsTokens = false;
+  OwnsTokens = ownsTokens;
+  DisableMacroExpansion = disableMacroExpansion;
   NumTokens = NumToks;
   CurToken = 0;
   InstantiateLoc = SourceLocation();
@@ -323,7 +326,7 @@ void TokenLexer::Lex(Token &Tok) {
   }
   
   // Handle recursive expansion!
-  if (Tok.getIdentifierInfo())
+  if (Tok.getIdentifierInfo() && !DisableMacroExpansion)
     return PP.HandleIdentifier(Tok);
 
   // Otherwise, return a normal token.
diff --git a/Parse/ParseObjc.cpp b/Parse/ParseObjc.cpp
index 816d2bf6a3..7f33ffd4c8 100644
--- a/Parse/ParseObjc.cpp
+++ b/Parse/ParseObjc.cpp
@@ -1150,10 +1150,8 @@ Parser::StmtResult Parser::ParseObjCSynchronizedStmt(SourceLocation atLoc) {
 ///     parameter-declaration
 ///     '...' [OBJC2]
 ///
-Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc, 
-                                            bool &processAtKeyword) {
+Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc) {
   bool catch_or_finally_seen = false;
-  processAtKeyword = false;
   
   ConsumeToken(); // consume try
   if (Tok.isNot(tok::l_brace)) {
@@ -1165,7 +1163,16 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc,
   StmtResult TryBody = ParseCompoundStatementBody();
   if (TryBody.isInvalid)
     TryBody = Actions.ActOnNullStmt(Tok.getLocation());
+  
   while (Tok.is(tok::at)) {
+    // At this point, we need to lookahead to determine if this @ is the start
+    // of an @catch or @finally.  We don't want to consume the @ token if this
+    // is an @try or @encode or something else.
+    Token AfterAt = GetLookAheadToken(1);
+    if (!AfterAt.isObjCAtKeyword(tok::objc_catch) &&
+        !AfterAt.isObjCAtKeyword(tok::objc_finally))
+      break;
+      
     SourceLocation AtCatchFinallyLoc = ConsumeToken();
     if (Tok.isObjCAtKeyword(tok::objc_catch)) {
       StmtTy *FirstPart = 0;
@@ -1203,7 +1210,8 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc,
         return true;
       }
       catch_or_finally_seen = true;
-    } else if (Tok.isObjCAtKeyword(tok::objc_finally)) {
+    } else {
+      assert(Tok.isObjCAtKeyword(tok::objc_finally) && "Lookahead confused?");
       ConsumeToken(); // consume finally
       
       StmtResult FinallyBody(true);
@@ -1217,9 +1225,6 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc,
                                                    FinallyBody.Val);
       catch_or_finally_seen = true;
       break;
-    } else {
-      processAtKeyword = true;
-      break;
     }
   }
   if (!catch_or_finally_seen) {
@@ -1274,18 +1279,7 @@ Parser::DeclTy *Parser::ParseObjCMethodDefinition() {
 
 Parser::StmtResult Parser::ParseObjCAtStatement(SourceLocation AtLoc) {
   if (Tok.isObjCAtKeyword(tok::objc_try)) {
-    bool parsedAtSign;
-    
-    StmtResult Res = ParseObjCTryStmt(AtLoc, parsedAtSign);
-    // FIXME: This hack results in a dropped AST node. To correctly implement 
-    // the hack, parseAtSign would need to bubble up to 
-    // ParseCompoundStatement(). This would involve adding an argument to this 
-    // routine and ParseStatementOrDeclaration(). Changing the parser in this
-    // fashion to solve such a conceptually simple problem is undesirable.
-    // Rework this clause once 2-token lookahead is implemented.
-    if (!Res.isInvalid && parsedAtSign)
-      return ParseObjCAtStatement(AtLoc);
-    return Res;
+    return ParseObjCTryStmt(AtLoc);
   } else if (Tok.isObjCAtKeyword(tok::objc_throw))
     return ParseObjCThrowStmt(AtLoc);
   else if (Tok.isObjCAtKeyword(tok::objc_synchronized))
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index fd12946a46..717f865b60 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -230,11 +230,19 @@ public:
   void EnterMacro(Token &Identifier, MacroArgs *Args);
   
   /// EnterTokenStream - Add a "macro" context to the top of the include stack,
-  /// which will cause the lexer to start returning the specified tokens.  Note
-  /// that these tokens will be re-macro-expanded when/if expansion is enabled.
-  /// This method assumes that the specified stream of tokens has a permanent
-  /// owner somewhere, so they do not need to be copied.
-  void EnterTokenStream(const Token *Toks, unsigned NumToks);
+  /// which will cause the lexer to start returning the specified tokens.
+  ///
+  /// If DisableMacroExpansion is true, tokens lexed from the token stream will
+  /// not be subject to further macro expansion.  Otherwise, these tokens will
+  /// be re-macro-expanded when/if expansion is enabled.
+  ///
+  /// If OwnsTokens is false, this method assumes that the specified stream of
+  /// tokens has a permanent owner somewhere, so they do not need to be copied.
+  /// If it is true, it assumes the array of tokens is allocated with new[] and
+  /// must be freed.
+  ///
+  void EnterTokenStream(const Token *Toks, unsigned NumToks,
+                        bool DisableMacroExpansion, bool OwnsTokens);
   
   /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
   /// lexer stack.  This should only be used in situations where the current
@@ -272,6 +280,17 @@ public:
     DisableMacroExpansion = OldVal;
   }
   
+  /// LookAhead - This peeks ahead N tokens and returns that token without
+  /// consuming any tokens.  LookAhead(0) returns the next token that would be
+  /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
+  /// returns normal tokens after phase 5.  As such, it is equivalent to using
+  /// 'Lex', not 'LexUnexpandedToken'.
+  ///
+  /// NOTE: is a relatively expensive method, so it should not be used in common
+  /// code paths if possible!
+  ///
+  Token LookAhead(unsigned N);
+  
   /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
   /// the specified Token's location, translating the token's start
   /// position in the current buffer into a SourcePosition object for rendering.
diff --git a/include/clang/Lex/TokenLexer.h b/include/clang/Lex/TokenLexer.h
index 0ecb4ec45d..9e384347d1 100644
--- a/include/clang/Lex/TokenLexer.h
+++ b/include/clang/Lex/TokenLexer.h
@@ -68,6 +68,10 @@ class TokenLexer {
   /// definition, we don't make a copy of it.
   bool OwnsTokens : 1;
   
+  /// DisableMacroExpansion - This is true when tokens lexed from the TokenLexer
+  /// should not be subject to further macro expansion.
+  bool DisableMacroExpansion : 1;
+  
   TokenLexer(const TokenLexer&);  // DO NOT IMPLEMENT
   void operator=(const TokenLexer&); // DO NOT IMPLEMENT
 public:
@@ -85,14 +89,19 @@ public:
   
   /// Create a TokenLexer for the specified token stream.  This does not
   /// take ownership of the specified token vector.
-  TokenLexer(const Token *TokArray, unsigned NumToks, Preprocessor &pp)
+  TokenLexer(const Token *TokArray, unsigned NumToks, bool DisableExpansion,
+             bool OwnsTokens, Preprocessor &pp)
     : Macro(0), ActualArgs(0), PP(pp), OwnsTokens(false) {
-    Init(TokArray, NumToks);
+    Init(TokArray, NumToks, DisableExpansion, OwnsTokens);
   }
   
   /// Init - Initialize this TokenLexer with the specified token stream.
   /// This does not take ownership of the specified token vector.
-  void Init(const Token *TokArray, unsigned NumToks);
+  ///
+  /// DisableExpansion is true when macro expansion of tokens lexed from this 
+  /// stream should be disabled.
+  void Init(const Token *TokArray, unsigned NumToks,
+            bool DisableMacroExpansion, bool OwnsTokens);
   
   ~TokenLexer() { destroy(); }
   
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index 860e5d700f..962b3370b7 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -189,6 +189,22 @@ private:
     return L;
   }
   
+  /// GetLookAheadToken - This peeks ahead N tokens and returns that token
+  /// without consuming any tokens.  LookAhead(0) returns 'Tok', LookAhead(1)
+  /// returns the token after Tok, etc.
+  ///
+  /// Note that this differs from the Preprocessor's LookAhead method, because
+  /// the Parser always has one token lexed that the preprocessor doesn't.
+  ///
+  /// NOTE: is a relatively expensive method, so it should not be used in common
+  /// code paths if possible!
+  ///
+  Token GetLookAheadToken(unsigned N) {
+    if (N == 0 || Tok.is(tok::eof)) return Tok;
+    return PP.LookAhead(N-1);
+  }
+  
+  
   /// MatchRHSPunctuation - For punctuation with a LHS and RHS (e.g. '['/']'),
   /// this helper function matches and consumes the specified RHS token if
   /// present.  If not present, it emits the specified diagnostic indicating
@@ -411,7 +427,7 @@ private:
   StmtResult ParseAsmStatement(bool &msAsm);
   StmtResult FuzzyParseMicrosoftAsmStatement();
   StmtResult ParseObjCAtStatement(SourceLocation atLoc);
-  StmtResult ParseObjCTryStmt(SourceLocation atLoc, bool &processAtKeyword);
+  StmtResult ParseObjCTryStmt(SourceLocation atLoc);
   StmtResult ParseObjCThrowStmt(SourceLocation atLoc);
   StmtResult ParseObjCSynchronizedStmt(SourceLocation atLoc);
   bool ParseAsmOperandsOpt(llvm::SmallVectorImpl<std::string> &Names,
diff --git a/test/Parser/objc-try-catch-1.m b/test/Parser/objc-try-catch-1.m
index 95ca5a18f4..ef4b94348d 100644
--- a/test/Parser/objc-try-catch-1.m
+++ b/test/Parser/objc-try-catch-1.m
@@ -2,6 +2,9 @@
 
 void * proc();
 
+@interface NSConstantString
+@end
+
 @interface Frob
 @end
 
@@ -40,13 +43,13 @@ void * foo()
 void bar()
 {
   @try {}// expected-error {{@try statment without a @catch and @finally clause}}
-  @"s" {} //  expected-warning {{result unused}} expected-error {{expected ';'}}
+  @"s"; //  expected-warning {{result unused}}
 }
 
 void baz()
 {
   @try {}// expected-error {{@try statment without a @catch and @finally clause}}
-  @try {}// expected-error {{undeclared identifier}}
+  @try {}
   @finally {}
 }
 
-- 
2.50.1