From 6b884508c3bc97cc9df9516adb92fbf88dd0a2e4 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 10 Mar 2008 06:06:04 +0000 Subject: [PATCH] implement simple support for arbitrary token lookahead. Change the objc @try parser to use it, fixing a FIXME. Update the objc-try-catch-1.m file to pass now that we get more reasonable errors. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@48129 91177308-0d34-0410-b5e6-96231b3b80d8 --- Lex/MacroArgs.cpp | 3 +- Lex/PPLexerChange.cpp | 74 ++++++++++++++++++++++++++++---- Lex/TokenLexer.cpp | 9 ++-- Parse/ParseObjc.cpp | 32 ++++++-------- include/clang/Lex/Preprocessor.h | 29 ++++++++++--- include/clang/Lex/TokenLexer.h | 15 +++++-- include/clang/Parse/Parser.h | 18 +++++++- test/Parser/objc-try-catch-1.m | 7 ++- 8 files changed, 145 insertions(+), 42 deletions(-) diff --git a/Lex/MacroArgs.cpp b/Lex/MacroArgs.cpp index bebf5cd2d8..a26e50eb76 100644 --- a/Lex/MacroArgs.cpp +++ b/Lex/MacroArgs.cpp @@ -112,7 +112,8 @@ MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) { // this, we set up a fake TokenLexer to lex from the unexpanded argument // list. With this installed, we lex expanded tokens until we hit the EOF // token at the end of the unexp list. - PP.EnterTokenStream(AT, NumToks); + PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, + false /*owns tokens*/); // Lex all of the macro-expanded tokens into Result. do { diff --git a/Lex/PPLexerChange.cpp b/Lex/PPLexerChange.cpp index c5998c93ef..95028501f7 100644 --- a/Lex/PPLexerChange.cpp +++ b/Lex/PPLexerChange.cpp @@ -25,7 +25,7 @@ PPCallbacks::~PPCallbacks() { //===----------------------------------------------------------------------===// -// Source File Location Methods. +// Miscellaneous Methods. //===----------------------------------------------------------------------===// /// isInPrimaryFile - Return true if we're in the top-level file, not in a @@ -60,6 +60,54 @@ Lexer *Preprocessor::getCurrentFileLexer() const { return 0; } +/// LookAhead - This peeks ahead N tokens and returns that token without +/// consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) returns +/// the token after Tok, etc. +/// +/// NOTE: is a relatively expensive method, so it should not be used in common +/// code paths if possible! +/// +Token Preprocessor::LookAhead(unsigned N) { + Token *LookaheadTokens = new Token[N]; + + // Read N+1 tokens into LookaheadTokens. After this loop, Tok is the token + // to return. + Token Tok; + unsigned NumTokens = 0; + for (; N != ~0U; --N, ++NumTokens) { + Lex(Tok); + LookaheadTokens[NumTokens] = Tok; + + // If we got to EOF, don't lex past it. This will cause LookAhead to return + // the EOF token. + if (Tok.is(tok::eof)) + break; + } + + // Okay, at this point, we have the token we want to return in Tok. However, + // we read it and a bunch of other stuff (in LookaheadTokens) that we must + // allow subsequent calls to 'Lex' to return. To do this, we push a new token + // lexer onto the lexer stack with the tokens we read here. This passes + // ownership of LookaheadTokens to EnterTokenStream. + // + // Note that we disable macro expansion of the tokens from this buffer, since + // any macros have already been expanded, and the internal preprocessor state + // may already read past new macros. Consider something like LookAhead(1) on + // X + // #define X 14 + // Y + // The lookahead call should return 'Y', and the next Lex call should return + // 'X' even though X -> 14 has already been entered as a macro. + // + EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/, + true /*OwnsTokens*/); + return Tok; +} + + +//===----------------------------------------------------------------------===// +// Methods for Entering and Callbacks for leaving various contexts +//===----------------------------------------------------------------------===// /// EnterSourceFile - Add a source file to the top of the include stack and /// start lexing tokens from it instead of the current buffer. Return true @@ -123,11 +171,20 @@ void Preprocessor::EnterMacro(Token &Tok, MacroArgs *Args) { } /// EnterTokenStream - Add a "macro" context to the top of the include stack, -/// which will cause the lexer to start returning the specified tokens. Note -/// that these tokens will be re-macro-expanded when/if expansion is enabled. -/// This method assumes that the specified stream of tokens has a permanent -/// owner somewhere, so they do not need to be copied. -void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks) { +/// which will cause the lexer to start returning the specified tokens. +/// +/// If DisableMacroExpansion is true, tokens lexed from the token stream will +/// not be subject to further macro expansion. Otherwise, these tokens will +/// be re-macro-expanded when/if expansion is enabled. +/// +/// If OwnsTokens is false, this method assumes that the specified stream of +/// tokens has a permanent owner somewhere, so they do not need to be copied. +/// If it is true, it assumes the array of tokens is allocated with new[] and +/// must be freed. +/// +void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, + bool DisableMacroExpansion, + bool OwnsTokens) { // Save our current state. IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, CurTokenLexer)); @@ -136,10 +193,11 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks) { // Create a macro expander to expand from the specified token stream. if (NumCachedTokenLexers == 0) { - CurTokenLexer = new TokenLexer(Toks, NumToks, *this); + CurTokenLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion, + OwnsTokens, *this); } else { CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers]; - CurTokenLexer->Init(Toks, NumToks); + CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens); } } diff --git a/Lex/TokenLexer.cpp b/Lex/TokenLexer.cpp index c91b753b6a..fc8cfd715c 100644 --- a/Lex/TokenLexer.cpp +++ b/Lex/TokenLexer.cpp @@ -36,6 +36,7 @@ void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) { HasLeadingSpace = Tok.hasLeadingSpace(); Tokens = &*Macro->tokens_begin(); OwnsTokens = false; + DisableMacroExpansion = false; NumTokens = Macro->tokens_end()-Macro->tokens_begin(); // If this is a function-like macro, expand the arguments and change @@ -53,7 +54,8 @@ void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) { /// Create a TokenLexer for the specified token stream. This does not /// take ownership of the specified token vector. -void TokenLexer::Init(const Token *TokArray, unsigned NumToks) { +void TokenLexer::Init(const Token *TokArray, unsigned NumToks, + bool disableMacroExpansion, bool ownsTokens) { // If the client is reusing a TokenLexer, make sure to free any memory // associated with it. destroy(); @@ -61,7 +63,8 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks) { Macro = 0; ActualArgs = 0; Tokens = TokArray; - OwnsTokens = false; + OwnsTokens = ownsTokens; + DisableMacroExpansion = disableMacroExpansion; NumTokens = NumToks; CurToken = 0; InstantiateLoc = SourceLocation(); @@ -323,7 +326,7 @@ void TokenLexer::Lex(Token &Tok) { } // Handle recursive expansion! - if (Tok.getIdentifierInfo()) + if (Tok.getIdentifierInfo() && !DisableMacroExpansion) return PP.HandleIdentifier(Tok); // Otherwise, return a normal token. diff --git a/Parse/ParseObjc.cpp b/Parse/ParseObjc.cpp index 816d2bf6a3..7f33ffd4c8 100644 --- a/Parse/ParseObjc.cpp +++ b/Parse/ParseObjc.cpp @@ -1150,10 +1150,8 @@ Parser::StmtResult Parser::ParseObjCSynchronizedStmt(SourceLocation atLoc) { /// parameter-declaration /// '...' [OBJC2] /// -Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc, - bool &processAtKeyword) { +Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc) { bool catch_or_finally_seen = false; - processAtKeyword = false; ConsumeToken(); // consume try if (Tok.isNot(tok::l_brace)) { @@ -1165,7 +1163,16 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc, StmtResult TryBody = ParseCompoundStatementBody(); if (TryBody.isInvalid) TryBody = Actions.ActOnNullStmt(Tok.getLocation()); + while (Tok.is(tok::at)) { + // At this point, we need to lookahead to determine if this @ is the start + // of an @catch or @finally. We don't want to consume the @ token if this + // is an @try or @encode or something else. + Token AfterAt = GetLookAheadToken(1); + if (!AfterAt.isObjCAtKeyword(tok::objc_catch) && + !AfterAt.isObjCAtKeyword(tok::objc_finally)) + break; + SourceLocation AtCatchFinallyLoc = ConsumeToken(); if (Tok.isObjCAtKeyword(tok::objc_catch)) { StmtTy *FirstPart = 0; @@ -1203,7 +1210,8 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc, return true; } catch_or_finally_seen = true; - } else if (Tok.isObjCAtKeyword(tok::objc_finally)) { + } else { + assert(Tok.isObjCAtKeyword(tok::objc_finally) && "Lookahead confused?"); ConsumeToken(); // consume finally StmtResult FinallyBody(true); @@ -1217,9 +1225,6 @@ Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc, FinallyBody.Val); catch_or_finally_seen = true; break; - } else { - processAtKeyword = true; - break; } } if (!catch_or_finally_seen) { @@ -1274,18 +1279,7 @@ Parser::DeclTy *Parser::ParseObjCMethodDefinition() { Parser::StmtResult Parser::ParseObjCAtStatement(SourceLocation AtLoc) { if (Tok.isObjCAtKeyword(tok::objc_try)) { - bool parsedAtSign; - - StmtResult Res = ParseObjCTryStmt(AtLoc, parsedAtSign); - // FIXME: This hack results in a dropped AST node. To correctly implement - // the hack, parseAtSign would need to bubble up to - // ParseCompoundStatement(). This would involve adding an argument to this - // routine and ParseStatementOrDeclaration(). Changing the parser in this - // fashion to solve such a conceptually simple problem is undesirable. - // Rework this clause once 2-token lookahead is implemented. - if (!Res.isInvalid && parsedAtSign) - return ParseObjCAtStatement(AtLoc); - return Res; + return ParseObjCTryStmt(AtLoc); } else if (Tok.isObjCAtKeyword(tok::objc_throw)) return ParseObjCThrowStmt(AtLoc); else if (Tok.isObjCAtKeyword(tok::objc_synchronized)) diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index fd12946a46..717f865b60 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -230,11 +230,19 @@ public: void EnterMacro(Token &Identifier, MacroArgs *Args); /// EnterTokenStream - Add a "macro" context to the top of the include stack, - /// which will cause the lexer to start returning the specified tokens. Note - /// that these tokens will be re-macro-expanded when/if expansion is enabled. - /// This method assumes that the specified stream of tokens has a permanent - /// owner somewhere, so they do not need to be copied. - void EnterTokenStream(const Token *Toks, unsigned NumToks); + /// which will cause the lexer to start returning the specified tokens. + /// + /// If DisableMacroExpansion is true, tokens lexed from the token stream will + /// not be subject to further macro expansion. Otherwise, these tokens will + /// be re-macro-expanded when/if expansion is enabled. + /// + /// If OwnsTokens is false, this method assumes that the specified stream of + /// tokens has a permanent owner somewhere, so they do not need to be copied. + /// If it is true, it assumes the array of tokens is allocated with new[] and + /// must be freed. + /// + void EnterTokenStream(const Token *Toks, unsigned NumToks, + bool DisableMacroExpansion, bool OwnsTokens); /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the /// lexer stack. This should only be used in situations where the current @@ -272,6 +280,17 @@ public: DisableMacroExpansion = OldVal; } + /// LookAhead - This peeks ahead N tokens and returns that token without + /// consuming any tokens. LookAhead(0) returns the next token that would be + /// returned by Lex(), LookAhead(1) returns the token after it, etc. This + /// returns normal tokens after phase 5. As such, it is equivalent to using + /// 'Lex', not 'LexUnexpandedToken'. + /// + /// NOTE: is a relatively expensive method, so it should not be used in common + /// code paths if possible! + /// + Token LookAhead(unsigned N); + /// Diag - Forwarding function for diagnostics. This emits a diagnostic at /// the specified Token's location, translating the token's start /// position in the current buffer into a SourcePosition object for rendering. diff --git a/include/clang/Lex/TokenLexer.h b/include/clang/Lex/TokenLexer.h index 0ecb4ec45d..9e384347d1 100644 --- a/include/clang/Lex/TokenLexer.h +++ b/include/clang/Lex/TokenLexer.h @@ -68,6 +68,10 @@ class TokenLexer { /// definition, we don't make a copy of it. bool OwnsTokens : 1; + /// DisableMacroExpansion - This is true when tokens lexed from the TokenLexer + /// should not be subject to further macro expansion. + bool DisableMacroExpansion : 1; + TokenLexer(const TokenLexer&); // DO NOT IMPLEMENT void operator=(const TokenLexer&); // DO NOT IMPLEMENT public: @@ -85,14 +89,19 @@ public: /// Create a TokenLexer for the specified token stream. This does not /// take ownership of the specified token vector. - TokenLexer(const Token *TokArray, unsigned NumToks, Preprocessor &pp) + TokenLexer(const Token *TokArray, unsigned NumToks, bool DisableExpansion, + bool OwnsTokens, Preprocessor &pp) : Macro(0), ActualArgs(0), PP(pp), OwnsTokens(false) { - Init(TokArray, NumToks); + Init(TokArray, NumToks, DisableExpansion, OwnsTokens); } /// Init - Initialize this TokenLexer with the specified token stream. /// This does not take ownership of the specified token vector. - void Init(const Token *TokArray, unsigned NumToks); + /// + /// DisableExpansion is true when macro expansion of tokens lexed from this + /// stream should be disabled. + void Init(const Token *TokArray, unsigned NumToks, + bool DisableMacroExpansion, bool OwnsTokens); ~TokenLexer() { destroy(); } diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h index 860e5d700f..962b3370b7 100644 --- a/include/clang/Parse/Parser.h +++ b/include/clang/Parse/Parser.h @@ -189,6 +189,22 @@ private: return L; } + /// GetLookAheadToken - This peeks ahead N tokens and returns that token + /// without consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) + /// returns the token after Tok, etc. + /// + /// Note that this differs from the Preprocessor's LookAhead method, because + /// the Parser always has one token lexed that the preprocessor doesn't. + /// + /// NOTE: is a relatively expensive method, so it should not be used in common + /// code paths if possible! + /// + Token GetLookAheadToken(unsigned N) { + if (N == 0 || Tok.is(tok::eof)) return Tok; + return PP.LookAhead(N-1); + } + + /// MatchRHSPunctuation - For punctuation with a LHS and RHS (e.g. '['/']'), /// this helper function matches and consumes the specified RHS token if /// present. If not present, it emits the specified diagnostic indicating @@ -411,7 +427,7 @@ private: StmtResult ParseAsmStatement(bool &msAsm); StmtResult FuzzyParseMicrosoftAsmStatement(); StmtResult ParseObjCAtStatement(SourceLocation atLoc); - StmtResult ParseObjCTryStmt(SourceLocation atLoc, bool &processAtKeyword); + StmtResult ParseObjCTryStmt(SourceLocation atLoc); StmtResult ParseObjCThrowStmt(SourceLocation atLoc); StmtResult ParseObjCSynchronizedStmt(SourceLocation atLoc); bool ParseAsmOperandsOpt(llvm::SmallVectorImpl &Names, diff --git a/test/Parser/objc-try-catch-1.m b/test/Parser/objc-try-catch-1.m index 95ca5a18f4..ef4b94348d 100644 --- a/test/Parser/objc-try-catch-1.m +++ b/test/Parser/objc-try-catch-1.m @@ -2,6 +2,9 @@ void * proc(); +@interface NSConstantString +@end + @interface Frob @end @@ -40,13 +43,13 @@ void * foo() void bar() { @try {}// expected-error {{@try statment without a @catch and @finally clause}} - @"s" {} // expected-warning {{result unused}} expected-error {{expected ';'}} + @"s"; // expected-warning {{result unused}} } void baz() { @try {}// expected-error {{@try statment without a @catch and @finally clause}} - @try {}// expected-error {{undeclared identifier}} + @try {} @finally {} } -- 2.40.0