implement simple support for arbitrary token lookahead. Change the

author Chris Lattner <sabre@nondot.org>

Mon, 10 Mar 2008 06:06:04 +0000 (06:06 +0000)

committer Chris Lattner <sabre@nondot.org>

Mon, 10 Mar 2008 06:06:04 +0000 (06:06 +0000)
author Chris Lattner <sabre@nondot.org>
Mon, 10 Mar 2008 06:06:04 +0000 (06:06 +0000)
committer Chris Lattner <sabre@nondot.org>
Mon, 10 Mar 2008 06:06:04 +0000 (06:06 +0000)
diff --git a/Lex/MacroArgs.cpp b/Lex/MacroArgs.cpp

index bebf5cd2d8f9d2e161278c203e8cf55ee040db84..a26e50eb762fe849a2a15484ddf68954ba139a2c 100644 (file)
--- a/Lex/MacroArgs.cpp
+++ b/Lex/MacroArgs.cpp
@@ -112,7 +112,8 @@ MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) {
    // this, we set up a fake TokenLexer to lex from the unexpanded argument
    // list.  With this installed, we lex expanded tokens until we hit the EOF
    // token at the end of the unexp list.
-  PP.EnterTokenStream(AT, NumToks);
+  PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, 
+                      false /*owns tokens*/);
  
    // Lex all of the macro-expanded tokens into Result.
    do {
diff --git a/Lex/PPLexerChange.cpp b/Lex/PPLexerChange.cpp

index c5998c93ef91c7d3add5d466d0da5c28304ccadf..95028501f7cb04e7ed9fabeac09161ca296b631b 100644 (file)
--- a/Lex/PPLexerChange.cpp
+++ b/Lex/PPLexerChange.cpp
@@ -25,7 +25,7 @@ PPCallbacks::~PPCallbacks() {
  
  
  //===----------------------------------------------------------------------===//
-// Source File Location Methods.
+// Miscellaneous Methods.
  //===----------------------------------------------------------------------===//
  
  /// isInPrimaryFile - Return true if we're in the top-level file, not in a
@@ -60,6 +60,54 @@ Lexer *Preprocessor::getCurrentFileLexer() const {
    return 0;
  }
  
+/// LookAhead - This peeks ahead N tokens and returns that token without
+/// consuming any tokens.  LookAhead(0) returns 'Tok', LookAhead(1) returns
+/// the token after Tok, etc.
+///
+/// NOTE: is a relatively expensive method, so it should not be used in common
+/// code paths if possible!
+///
+Token Preprocessor::LookAhead(unsigned N) {
+  Token *LookaheadTokens = new Token[N];
+
+  // Read N+1 tokens into LookaheadTokens.  After this loop, Tok is the token
+  // to return.
+  Token Tok;
+  unsigned NumTokens = 0;
+  for (; N != ~0U; --N, ++NumTokens) {
+    Lex(Tok);
+    LookaheadTokens[NumTokens] = Tok;
+    
+    // If we got to EOF, don't lex past it.  This will cause LookAhead to return
+    // the EOF token.
+    if (Tok.is(tok::eof))
+      break;
+  }
+
+  // Okay, at this point, we have the token we want to return in Tok.  However,
+  // we read it and a bunch of other stuff (in LookaheadTokens) that we must
+  // allow subsequent calls to 'Lex' to return.  To do this, we push a new token
+  // lexer onto the lexer stack with the tokens we read here.  This passes
+  // ownership of LookaheadTokens to EnterTokenStream.
+  //
+  // Note that we disable macro expansion of the tokens from this buffer, since
+  // any macros have already been expanded, and the internal preprocessor state
+  // may already read past new macros.  Consider something like LookAhead(1) on
+  //      X
+  //      #define X 14
+  //      Y
+  // The lookahead call should return 'Y', and the next Lex call should return
+  // 'X' even though X -> 14 has already been entered as a macro.
+  //
+  EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/,
+                   true /*OwnsTokens*/);
+  return Tok;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for Entering and Callbacks for leaving various contexts
+//===----------------------------------------------------------------------===//
  
  /// EnterSourceFile - Add a source file to the top of the include stack and
  /// start lexing tokens from it instead of the current buffer.  Return true
@@ -123,11 +171,20 @@ void Preprocessor::EnterMacro(Token &Tok, MacroArgs *Args) {
  }
  
  /// EnterTokenStream - Add a "macro" context to the top of the include stack,
-/// which will cause the lexer to start returning the specified tokens.  Note
-/// that these tokens will be re-macro-expanded when/if expansion is enabled.
-/// This method assumes that the specified stream of tokens has a permanent
-/// owner somewhere, so they do not need to be copied.
-void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks) {
+/// which will cause the lexer to start returning the specified tokens.
+///
+/// If DisableMacroExpansion is true, tokens lexed from the token stream will
+/// not be subject to further macro expansion.  Otherwise, these tokens will
+/// be re-macro-expanded when/if expansion is enabled.
+///
+/// If OwnsTokens is false, this method assumes that the specified stream of
+/// tokens has a permanent owner somewhere, so they do not need to be copied.
+/// If it is true, it assumes the array of tokens is allocated with new[] and
+/// must be freed.
+///
+void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
+                                    bool DisableMacroExpansion,
+                                    bool OwnsTokens) {
    // Save our current state.
    IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
                                                 CurTokenLexer));
@@ -136,10 +193,11 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks) {
  
    // Create a macro expander to expand from the specified token stream.
    if (NumCachedTokenLexers == 0) {
-    CurTokenLexer = new TokenLexer(Toks, NumToks, *this);
+    CurTokenLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion,
+                                   OwnsTokens, *this);
    } else {
      CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers];
-    CurTokenLexer->Init(Toks, NumToks);
+    CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
    }
  }
  
diff --git a/Lex/TokenLexer.cpp b/Lex/TokenLexer.cpp

index c91b753b6a6c56f2fef33c59b623577f07bc9888..fc8cfd715c48df02df32434c3bddd9719b4979bd 100644 (file)
--- a/Lex/TokenLexer.cpp
+++ b/Lex/TokenLexer.cpp
@@ -36,6 +36,7 @@ void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) {
    HasLeadingSpace = Tok.hasLeadingSpace();
    Tokens = &*Macro->tokens_begin();
    OwnsTokens = false;
+  DisableMacroExpansion = false;
    NumTokens = Macro->tokens_end()-Macro->tokens_begin();
  
    // If this is a function-like macro, expand the arguments and change
@@ -53,7 +54,8 @@ void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) {
  
  /// Create a TokenLexer for the specified token stream.  This does not
  /// take ownership of the specified token vector.
-void TokenLexer::Init(const Token *TokArray, unsigned NumToks) {
+void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
+                      bool disableMacroExpansion, bool ownsTokens) {
    // If the client is reusing a TokenLexer, make sure to free any memory
    // associated with it.
    destroy();
@@ -61,7 +63,8 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks) {
    Macro = 0;
    ActualArgs = 0;
    Tokens = TokArray;
-  OwnsTokens = false;
+  OwnsTokens = ownsTokens;
+  DisableMacroExpansion = disableMacroExpansion;
    NumTokens = NumToks;
    CurToken = 0;
    InstantiateLoc = SourceLocation();
@@ -323,7 +326,7 @@ void TokenLexer::Lex(Token &Tok) {
    }
    
    // Handle recursive expansion!
-  if (Tok.getIdentifierInfo())
+  if (Tok.getIdentifierInfo() && !DisableMacroExpansion)
      return PP.HandleIdentifier(Tok);
  
    // Otherwise, return a normal token.
diff --git a/Parse/ParseObjc.cpp b/Parse/ParseObjc.cpp

index 816d2bf6a3d3d033fa3fe6e63422a0cb914616d6..7f33ffd4c8a22e0cf5c36cedc3f9a9a40b28b0e5 100644 (file)
--- a/Parse/ParseObjc.cpp
+++ b/Parse/ParseObjc.cpp
@@ -1150,10 +1150,8 @@ Parser::StmtResult Parser::ParseObjCSynchronizedStmt(SourceLocation atLoc) {
  ///     parameter-declaration
  ///     '...' [OBJC2]
  ///
-Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc, 
-                                            bool &processAtKeyword) {
+Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc) {
    bool catch_or_finally_seen = false;
-  processAtKeyword = false;
    
    ConsumeToken(); // consume try
    if (Tok.isNot(tok::l_brace)) {
@@ -1165,7 +1163,16 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc,
    StmtResult TryBody = ParseCompoundStatementBody();
    if (TryBody.isInvalid)
      TryBody = Actions.ActOnNullStmt(Tok.getLocation());
+  
    while (Tok.is(tok::at)) {
+    // At this point, we need to lookahead to determine if this @ is the start
+    // of an @catch or @finally.  We don't want to consume the @ token if this
+    // is an @try or @encode or something else.
+    Token AfterAt = GetLookAheadToken(1);
+    if (!AfterAt.isObjCAtKeyword(tok::objc_catch) &&
+        !AfterAt.isObjCAtKeyword(tok::objc_finally))
+      break;
+      
      SourceLocation AtCatchFinallyLoc = ConsumeToken();
      if (Tok.isObjCAtKeyword(tok::objc_catch)) {
        StmtTy *FirstPart = 0;
@@ -1203,7 +1210,8 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc,
          return true;
        }
        catch_or_finally_seen = true;
-    } else if (Tok.isObjCAtKeyword(tok::objc_finally)) {
+    } else {
+      assert(Tok.isObjCAtKeyword(tok::objc_finally) && "Lookahead confused?");
        ConsumeToken(); // consume finally
        
        StmtResult FinallyBody(true);
@@ -1217,9 +1225,6 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc,
                                                     FinallyBody.Val);
        catch_or_finally_seen = true;
        break;
-    } else {
-      processAtKeyword = true;
-      break;
      }
    }
    if (!catch_or_finally_seen) {
@@ -1274,18 +1279,7 @@ Parser::DeclTy *Parser::ParseObjCMethodDefinition() {
  
  Parser::StmtResult Parser::ParseObjCAtStatement(SourceLocation AtLoc) {
    if (Tok.isObjCAtKeyword(tok::objc_try)) {
-    bool parsedAtSign;
-    
-    StmtResult Res = ParseObjCTryStmt(AtLoc, parsedAtSign);
-    // FIXME: This hack results in a dropped AST node. To correctly implement 
-    // the hack, parseAtSign would need to bubble up to 
-    // ParseCompoundStatement(). This would involve adding an argument to this 
-    // routine and ParseStatementOrDeclaration(). Changing the parser in this
-    // fashion to solve such a conceptually simple problem is undesirable.
-    // Rework this clause once 2-token lookahead is implemented.
-    if (!Res.isInvalid && parsedAtSign)
-      return ParseObjCAtStatement(AtLoc);
-    return Res;
+    return ParseObjCTryStmt(AtLoc);
    } else if (Tok.isObjCAtKeyword(tok::objc_throw))
      return ParseObjCThrowStmt(AtLoc);
    else if (Tok.isObjCAtKeyword(tok::objc_synchronized))
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h

index fd12946a461ae9fdd3b4662d179751305caf8fb2..717f865b60ba9334ba49dad5beef26561675cfdb 100644 (file)
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -230,11 +230,19 @@ public:
    void EnterMacro(Token &Identifier, MacroArgs *Args);
    
    /// EnterTokenStream - Add a "macro" context to the top of the include stack,
-  /// which will cause the lexer to start returning the specified tokens.  Note
-  /// that these tokens will be re-macro-expanded when/if expansion is enabled.
-  /// This method assumes that the specified stream of tokens has a permanent
-  /// owner somewhere, so they do not need to be copied.
-  void EnterTokenStream(const Token *Toks, unsigned NumToks);
+  /// which will cause the lexer to start returning the specified tokens.
+  ///
+  /// If DisableMacroExpansion is true, tokens lexed from the token stream will
+  /// not be subject to further macro expansion.  Otherwise, these tokens will
+  /// be re-macro-expanded when/if expansion is enabled.
+  ///
+  /// If OwnsTokens is false, this method assumes that the specified stream of
+  /// tokens has a permanent owner somewhere, so they do not need to be copied.
+  /// If it is true, it assumes the array of tokens is allocated with new[] and
+  /// must be freed.
+  ///
+  void EnterTokenStream(const Token *Toks, unsigned NumToks,
+                        bool DisableMacroExpansion, bool OwnsTokens);
    
    /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
    /// lexer stack.  This should only be used in situations where the current
@@ -272,6 +280,17 @@ public:
      DisableMacroExpansion = OldVal;
    }
    
+  /// LookAhead - This peeks ahead N tokens and returns that token without
+  /// consuming any tokens.  LookAhead(0) returns the next token that would be
+  /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
+  /// returns normal tokens after phase 5.  As such, it is equivalent to using
+  /// 'Lex', not 'LexUnexpandedToken'.
+  ///
+  /// NOTE: is a relatively expensive method, so it should not be used in common
+  /// code paths if possible!
+  ///
+  Token LookAhead(unsigned N);
+  
    /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
    /// the specified Token's location, translating the token's start
    /// position in the current buffer into a SourcePosition object for rendering.
diff --git a/include/clang/Lex/TokenLexer.h b/include/clang/Lex/TokenLexer.h

index 0ecb4ec45d3f9c66ac6f44532df188be20364323..9e384347d1b5fb43ffcc29bb8ac73c1b7f79bb7c 100644 (file)
--- a/include/clang/Lex/TokenLexer.h
+++ b/include/clang/Lex/TokenLexer.h
@@ -68,6 +68,10 @@ class TokenLexer {
    /// definition, we don't make a copy of it.
    bool OwnsTokens : 1;
    
+  /// DisableMacroExpansion - This is true when tokens lexed from the TokenLexer
+  /// should not be subject to further macro expansion.
+  bool DisableMacroExpansion : 1;
+  
    TokenLexer(const TokenLexer&);  // DO NOT IMPLEMENT
    void operator=(const TokenLexer&); // DO NOT IMPLEMENT
  public:
@@ -85,14 +89,19 @@ public:
    
    /// Create a TokenLexer for the specified token stream.  This does not
    /// take ownership of the specified token vector.
-  TokenLexer(const Token *TokArray, unsigned NumToks, Preprocessor &pp)
+  TokenLexer(const Token *TokArray, unsigned NumToks, bool DisableExpansion,
+             bool OwnsTokens, Preprocessor &pp)
      : Macro(0), ActualArgs(0), PP(pp), OwnsTokens(false) {
-    Init(TokArray, NumToks);
+    Init(TokArray, NumToks, DisableExpansion, OwnsTokens);
    }
    
    /// Init - Initialize this TokenLexer with the specified token stream.
    /// This does not take ownership of the specified token vector.
-  void Init(const Token *TokArray, unsigned NumToks);
+  ///
+  /// DisableExpansion is true when macro expansion of tokens lexed from this 
+  /// stream should be disabled.
+  void Init(const Token *TokArray, unsigned NumToks,
+            bool DisableMacroExpansion, bool OwnsTokens);
    
    ~TokenLexer() { destroy(); }
    
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h

index 860e5d700fb62d15ce9ba53db4fd47b61afb0c77..962b3370b727b6447623342bc11e65063599e794 100644 (file)
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -189,6 +189,22 @@ private:
      return L;
    }
    
+  /// GetLookAheadToken - This peeks ahead N tokens and returns that token
+  /// without consuming any tokens.  LookAhead(0) returns 'Tok', LookAhead(1)
+  /// returns the token after Tok, etc.
+  ///
+  /// Note that this differs from the Preprocessor's LookAhead method, because
+  /// the Parser always has one token lexed that the preprocessor doesn't.
+  ///
+  /// NOTE: is a relatively expensive method, so it should not be used in common
+  /// code paths if possible!
+  ///
+  Token GetLookAheadToken(unsigned N) {
+    if (N == 0 || Tok.is(tok::eof)) return Tok;
+    return PP.LookAhead(N-1);
+  }
+  
+  
    /// MatchRHSPunctuation - For punctuation with a LHS and RHS (e.g. '['/']'),
    /// this helper function matches and consumes the specified RHS token if
    /// present.  If not present, it emits the specified diagnostic indicating
@@ -411,7 +427,7 @@ private:
    StmtResult ParseAsmStatement(bool &msAsm);
    StmtResult FuzzyParseMicrosoftAsmStatement();
    StmtResult ParseObjCAtStatement(SourceLocation atLoc);
-  StmtResult ParseObjCTryStmt(SourceLocation atLoc, bool &processAtKeyword);
+  StmtResult ParseObjCTryStmt(SourceLocation atLoc);
    StmtResult ParseObjCThrowStmt(SourceLocation atLoc);
    StmtResult ParseObjCSynchronizedStmt(SourceLocation atLoc);
    bool ParseAsmOperandsOpt(llvm::SmallVectorImpl<std::string> &Names,
diff --git a/test/Parser/objc-try-catch-1.m b/test/Parser/objc-try-catch-1.m

index 95ca5a18f4207d37c705037fc33923f9347f661f..ef4b94348d0d21f86d8abaaf06fa304a665e39a1 100644 (file)
--- a/test/Parser/objc-try-catch-1.m
+++ b/test/Parser/objc-try-catch-1.m
@@ -2,6 +2,9 @@
  
  void * proc();
  
+@interface NSConstantString
+@end
+
  @interface Frob
  @end
  
@@ -40,13 +43,13 @@ void * foo()
  void bar()
  {
    @try {}// expected-error {{@try statment without a @catch and @finally clause}}
-  @"s" {} //  expected-warning {{result unused}} expected-error {{expected ';'}}
+  @"s"; //  expected-warning {{result unused}}
  }
  
  void baz()
  {
    @try {}// expected-error {{@try statment without a @catch and @finally clause}}
-  @try {}// expected-error {{undeclared identifier}}
+  @try {}
    @finally {}
  }
author	Chris Lattner <sabre@nondot.org>
	Mon, 10 Mar 2008 06:06:04 +0000 (06:06 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Mon, 10 Mar 2008 06:06:04 +0000 (06:06 +0000)
Lex/MacroArgs.cpp		patch \| blob \| history
Lex/PPLexerChange.cpp		patch \| blob \| history
Lex/TokenLexer.cpp		patch \| blob \| history
Parse/ParseObjc.cpp		patch \| blob \| history
include/clang/Lex/Preprocessor.h		patch \| blob \| history
include/clang/Lex/TokenLexer.h		patch \| blob \| history
include/clang/Parse/Parser.h		patch \| blob \| history
test/Parser/objc-try-catch-1.m		patch \| blob \| history