From: Manuel Klimek Date: Tue, 28 May 2013 11:55:06 +0000 (+0000) Subject: A first step towards giving format tokens pointer identity. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=96e888b0dd3f114eee7e4455a74b8cb5f52e071d;p=clang A first step towards giving format tokens pointer identity. With this patch, we create all tokens in one go before parsing and pass an ArrayRef to the UnwrappedLineParser. The UnwrappedLineParser is switched to use pointer-to-token internally. The UnwrappedLineParser still copies the tokens into the UnwrappedLines. This will be fixed in an upcoming patch. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@182768 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index acc74496eb..c126249ef8 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -1087,60 +1087,73 @@ private: unsigned Count; }; -class LexerBasedFormatTokenSource : public FormatTokenSource { +class FormatTokenLexer { public: - LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) - : GreaterStashed(false), TrailingWhitespace(0), Lex(Lex), + FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr) + : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()) { Lex.SetKeepWhitespaceMode(true); } - virtual FormatToken getNextToken() { + ArrayRef lex() { + assert(Tokens.empty()); + do { + Tokens.push_back(getNextToken()); + } while (Tokens.back()->Tok.isNot(tok::eof)); + return Tokens; + } + + IdentifierTable &getIdentTable() { return IdentTable; } + +private: + FormatToken *getNextToken() { if (GreaterStashed) { - FormatTok.NewlinesBefore = 0; + FormatTok = new (Allocator.Allocate()) FormatToken(*FormatTok); + FormatTok->NewlinesBefore = 0; SourceLocation GreaterLocation = - FormatTok.Tok.getLocation().getLocWithOffset(1); - FormatTok.WhitespaceRange = SourceRange(GreaterLocation, GreaterLocation); + FormatTok->Tok.getLocation().getLocWithOffset(1); + FormatTok->WhitespaceRange = + SourceRange(GreaterLocation, GreaterLocation); GreaterStashed = false; return FormatTok; } - FormatTok = FormatToken(); - Lex.LexFromRawLexer(FormatTok.Tok); - StringRef Text = rawTokenText(FormatTok.Tok); + FormatTok = new (Allocator.Allocate()) FormatToken; + Lex.LexFromRawLexer(FormatTok->Tok); + StringRef Text = rawTokenText(FormatTok->Tok); SourceLocation WhitespaceStart = - FormatTok.Tok.getLocation().getLocWithOffset(-TrailingWhitespace); + FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); if (SourceMgr.getFileOffset(WhitespaceStart) == 0) - FormatTok.IsFirst = true; + FormatTok->IsFirst = true; // Consume and record whitespace until we find a significant token. unsigned WhitespaceLength = TrailingWhitespace; - while (FormatTok.Tok.is(tok::unknown)) { + while (FormatTok->Tok.is(tok::unknown)) { unsigned Newlines = Text.count('\n'); if (Newlines > 0) - FormatTok.LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1; + FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1; unsigned EscapedNewlines = Text.count("\\\n"); - FormatTok.NewlinesBefore += Newlines; - FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines; - WhitespaceLength += FormatTok.Tok.getLength(); + FormatTok->NewlinesBefore += Newlines; + FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines; + WhitespaceLength += FormatTok->Tok.getLength(); - if (FormatTok.Tok.is(tok::eof)) { - FormatTok.WhitespaceRange = + if (FormatTok->Tok.is(tok::eof)) { + FormatTok->WhitespaceRange = SourceRange(WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); return FormatTok; } - Lex.LexFromRawLexer(FormatTok.Tok); - Text = rawTokenText(FormatTok.Tok); + Lex.LexFromRawLexer(FormatTok->Tok); + Text = rawTokenText(FormatTok->Tok); } // Now FormatTok is the next non-whitespace token. - FormatTok.TokenLength = Text.size(); + FormatTok->TokenLength = Text.size(); TrailingWhitespace = 0; - if (FormatTok.Tok.is(tok::comment)) { + if (FormatTok->Tok.is(tok::comment)) { TrailingWhitespace = Text.size() - Text.rtrim().size(); - FormatTok.TokenLength -= TrailingWhitespace; + FormatTok->TokenLength -= TrailingWhitespace; } // In case the token starts with escaped newlines, we want to @@ -1151,41 +1164,40 @@ public: // FIXME: Add a more explicit test. unsigned i = 0; while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { - // FIXME: ++FormatTok.NewlinesBefore is missing... + // FIXME: ++FormatTok->NewlinesBefore is missing... WhitespaceLength += 2; - FormatTok.TokenLength -= 2; + FormatTok->TokenLength -= 2; i += 2; } - if (FormatTok.Tok.is(tok::raw_identifier)) { + if (FormatTok->Tok.is(tok::raw_identifier)) { IdentifierInfo &Info = IdentTable.get(Text); - FormatTok.Tok.setIdentifierInfo(&Info); - FormatTok.Tok.setKind(Info.getTokenID()); + FormatTok->Tok.setIdentifierInfo(&Info); + FormatTok->Tok.setKind(Info.getTokenID()); } - if (FormatTok.Tok.is(tok::greatergreater)) { - FormatTok.Tok.setKind(tok::greater); - FormatTok.TokenLength = 1; + if (FormatTok->Tok.is(tok::greatergreater)) { + FormatTok->Tok.setKind(tok::greater); + FormatTok->TokenLength = 1; GreaterStashed = true; } - FormatTok.WhitespaceRange = SourceRange( + FormatTok->WhitespaceRange = SourceRange( WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); - FormatTok.TokenText = StringRef( - SourceMgr.getCharacterData(FormatTok.getStartOfNonWhitespace()), - FormatTok.TokenLength); + FormatTok->TokenText = StringRef( + SourceMgr.getCharacterData(FormatTok->getStartOfNonWhitespace()), + FormatTok->TokenLength); return FormatTok; } - IdentifierTable &getIdentTable() { return IdentTable; } - -private: - FormatToken FormatTok; + FormatToken *FormatTok; bool GreaterStashed; unsigned TrailingWhitespace; Lexer &Lex; SourceManager &SourceMgr; IdentifierTable IdentTable; + llvm::SpecificBumpPtrAllocator Allocator; + SmallVector Tokens; /// Returns the text of \c FormatTok. StringRef rawTokenText(Token &Tok) { @@ -1204,8 +1216,9 @@ public: virtual ~Formatter() {} tooling::Replacements format() { - LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); - UnwrappedLineParser Parser(Style, Tokens, *this); + FormatTokenLexer Tokens(Lex, SourceMgr); + + UnwrappedLineParser Parser(Style, Tokens.lex(), *this); bool StructuralError = Parser.parse(); TokenAnnotator Annotator(Style, SourceMgr, Lex, Tokens.getIdentTable().get("in")); diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index aaed97c302..2d44cdb12f 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -21,6 +21,15 @@ namespace clang { namespace format { +class FormatTokenSource { +public: + virtual ~FormatTokenSource() {} + virtual FormatToken *getNextToken() = 0; + + virtual unsigned getPosition() = 0; + virtual FormatToken *setPosition(unsigned Position) = 0; +}; + class ScopedDeclarationState { public: ScopedDeclarationState(UnwrappedLine &Line, std::vector &Stack, @@ -44,11 +53,11 @@ private: class ScopedMacroState : public FormatTokenSource { public: ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, - FormatToken &ResetToken, bool &StructuralError) + FormatToken *&ResetToken, bool &StructuralError) : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), StructuralError(StructuralError), - PreviousStructuralError(StructuralError) { + PreviousStructuralError(StructuralError), Token(NULL) { TokenSource = this; Line.Level = 0; Line.InPPDirective = true; @@ -62,13 +71,13 @@ public: StructuralError = PreviousStructuralError; } - virtual FormatToken getNextToken() { + virtual FormatToken *getNextToken() { // The \c UnwrappedLineParser guards against this by never calling // \c getNextToken() after it has encountered the first eof token. assert(!eof()); Token = PreviousTokenSource->getNextToken(); if (eof()) - return createEOF(); + return getFakeEOF(); return Token; } @@ -76,30 +85,34 @@ public: return PreviousTokenSource->getPosition(); } - virtual FormatToken setPosition(unsigned Position) { + virtual FormatToken *setPosition(unsigned Position) { Token = PreviousTokenSource->setPosition(Position); return Token; } private: - bool eof() { return Token.HasUnescapedNewline; } - - FormatToken createEOF() { - FormatToken FormatTok; - FormatTok.Tok.startToken(); - FormatTok.Tok.setKind(tok::eof); - return FormatTok; + bool eof() { return Token && Token->HasUnescapedNewline; } + + FormatToken *getFakeEOF() { + static bool EOFInitialized = false; + static FormatToken FormatTok; + if (!EOFInitialized) { + FormatTok.Tok.startToken(); + FormatTok.Tok.setKind(tok::eof); + EOFInitialized = true; + } + return &FormatTok; } UnwrappedLine &Line; FormatTokenSource *&TokenSource; - FormatToken &ResetToken; + FormatToken *&ResetToken; unsigned PreviousLineLevel; FormatTokenSource *PreviousTokenSource; bool &StructuralError; bool PreviousStructuralError; - FormatToken Token; + FormatToken *Token; }; class ScopedLineState { @@ -135,10 +148,10 @@ private: class IndexedTokenSource : public FormatTokenSource { public: - IndexedTokenSource(ArrayRef Tokens) + IndexedTokenSource(ArrayRef Tokens) : Tokens(Tokens), Position(-1) {} - virtual FormatToken getNextToken() { + virtual FormatToken *getNextToken() { ++Position; return Tokens[Position]; } @@ -148,28 +161,23 @@ public: return Position; } - virtual FormatToken setPosition(unsigned P) { + virtual FormatToken *setPosition(unsigned P) { Position = P; return Tokens[Position]; } private: - ArrayRef Tokens; + ArrayRef Tokens; int Position; }; UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, - FormatTokenSource &Tokens, + ArrayRef Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), StructuralError(false), Style(Style), - Tokens(NULL), Callback(Callback) { - FormatToken Tok; - do { - Tok = Tokens.getNextToken(); - AllTokens.push_back(Tok); - } while (Tok.Tok.isNot(tok::eof)); - LBraces.resize(AllTokens.size(), BS_Unknown); + CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL), + Callback(Callback), AllTokens(Tokens) { + LBraces.resize(Tokens.size(), BS_Unknown); } bool UnwrappedLineParser::parse() { @@ -201,7 +209,7 @@ void UnwrappedLineParser::parseFile() { void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::comment: nextToken(); addUnwrappedLine(); @@ -233,15 +241,15 @@ void UnwrappedLineParser::calculateBraceTypes() { // definitions, too. unsigned StoredPosition = Tokens->getPosition(); unsigned Position = StoredPosition; - FormatToken Tok = FormatTok; + FormatToken *Tok = FormatTok; // Keep a stack of positions of lbrace tokens. We will // update information about whether an lbrace starts a // braced init list or a different block during the loop. SmallVector LBraceStack; - assert(Tok.Tok.is(tok::l_brace)); + assert(Tok->Tok.is(tok::l_brace)); do { - FormatToken NextTok = Tokens->getNextToken(); - switch (Tok.Tok.getKind()) { + FormatToken *NextTok = Tokens->getNextToken(); + switch (Tok->Tok.getKind()) { case tok::l_brace: LBraceStack.push_back(Position); break; @@ -256,8 +264,8 @@ void UnwrappedLineParser::calculateBraceTypes() { // Thus, if the parent is a braced init list, we consider all // brace blocks inside it braced init list. That works good enough // for now, but we will need to fix it to correctly handle lambdas. - if (NextTok.Tok.is(tok::comma) || NextTok.Tok.is(tok::semi) || - NextTok.Tok.is(tok::r_paren) || NextTok.Tok.is(tok::l_brace)) + if (NextTok->Tok.is(tok::comma) || NextTok->Tok.is(tok::semi) || + NextTok->Tok.is(tok::r_paren) || NextTok->Tok.is(tok::l_brace)) LBraces[LBraceStack.back()] = BS_BracedInit; else LBraces[LBraceStack.back()] = BS_Block; @@ -279,7 +287,7 @@ void UnwrappedLineParser::calculateBraceTypes() { } Tok = NextTok; ++Position; - } while (Tok.Tok.isNot(tok::eof)); + } while (Tok->Tok.isNot(tok::eof)); // Assume other blocks for all unclosed opening braces. for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { if (LBraces[LBraceStack[i]] == BS_Unknown) @@ -290,7 +298,7 @@ void UnwrappedLineParser::calculateBraceTypes() { void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels) { - assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); + assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); nextToken(); addUnwrappedLine(); @@ -300,7 +308,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, Line->Level += AddLevels; parseLevel(/*HasOpeningBrace=*/ true); - if (!FormatTok.Tok.is(tok::r_brace)) { + if (!FormatTok->Tok.is(tok::r_brace)) { Line->Level -= AddLevels; StructuralError = true; return; @@ -311,16 +319,16 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, } void UnwrappedLineParser::parsePPDirective() { - assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); + assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); nextToken(); - if (FormatTok.Tok.getIdentifierInfo() == NULL) { + if (FormatTok->Tok.getIdentifierInfo() == NULL) { parsePPUnknown(); return; } - switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { + switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_define: parsePPDefine(); return; @@ -355,10 +363,10 @@ void UnwrappedLineParser::pushPPConditional() { void UnwrappedLineParser::parsePPIf() { nextToken(); - if ((FormatTok.Tok.isLiteral() && - StringRef(FormatTok.Tok.getLiteralData(), FormatTok.Tok.getLength()) == + if ((FormatTok->Tok.isLiteral() && + StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) == "0") || - FormatTok.Tok.is(tok::kw_false)) { + FormatTok->Tok.is(tok::kw_false)) { PPStack.push_back(PP_Unreachable); } else { pushPPConditional(); @@ -391,14 +399,14 @@ void UnwrappedLineParser::parsePPEndIf() { void UnwrappedLineParser::parsePPDefine() { nextToken(); - if (FormatTok.Tok.getKind() != tok::identifier) { + if (FormatTok->Tok.getKind() != tok::identifier) { parsePPUnknown(); return; } nextToken(); - if (FormatTok.Tok.getKind() == tok::l_paren && - FormatTok.WhitespaceRange.getBegin() == - FormatTok.WhitespaceRange.getEnd()) { + if (FormatTok->Tok.getKind() == tok::l_paren && + FormatTok->WhitespaceRange.getBegin() == + FormatTok->WhitespaceRange.getEnd()) { parseParens(); } addUnwrappedLine(); @@ -447,15 +455,15 @@ bool tokenCanStartNewLine(clang::Token Tok) { } void UnwrappedLineParser::parseStructuralElement() { - assert(!FormatTok.Tok.is(tok::l_brace)); - switch (FormatTok.Tok.getKind()) { + assert(!FormatTok->Tok.is(tok::l_brace)); + switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBracedList(); break; } - switch (FormatTok.Tok.getObjCKeywordID()) { + switch (FormatTok->Tok.getObjCKeywordID()) { case tok::objc_public: case tok::objc_protected: case tok::objc_package: @@ -482,7 +490,7 @@ void UnwrappedLineParser::parseStructuralElement() { return; case tok::kw_inline: nextToken(); - if (FormatTok.Tok.is(tok::kw_namespace)) { + if (FormatTok->Tok.is(tok::kw_namespace)) { parseNamespace(); return; } @@ -517,9 +525,9 @@ void UnwrappedLineParser::parseStructuralElement() { return; case tok::kw_extern: nextToken(); - if (FormatTok.Tok.is(tok::string_literal)) { + if (FormatTok->Tok.is(tok::string_literal)) { nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBlock(/*MustBeDeclaration=*/ true, 0); addUnwrappedLine(); return; @@ -531,10 +539,10 @@ void UnwrappedLineParser::parseStructuralElement() { break; } do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) parseBracedList(); break; case tok::kw_enum: @@ -576,15 +584,15 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::identifier: nextToken(); if (Line->Tokens.size() == 1) { - if (FormatTok.Tok.is(tok::colon)) { + if (FormatTok->Tok.is(tok::colon)) { parseLabel(); return; } // Recognize function-like macro usages without trailing semicolon. - if (FormatTok.Tok.is(tok::l_paren)) { + if (FormatTok->Tok.is(tok::l_paren)) { parseParens(); - if (FormatTok.HasUnescapedNewline && - tokenCanStartNewLine(FormatTok.Tok)) { + if (FormatTok->HasUnescapedNewline && + tokenCanStartNewLine(FormatTok->Tok)) { addUnwrappedLine(); return; } @@ -593,7 +601,7 @@ void UnwrappedLineParser::parseStructuralElement() { break; case tok::equal: nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBracedList(); } break; @@ -624,7 +632,7 @@ void UnwrappedLineParser::parseBracedList() { // FIXME: When we start to support lambdas, we'll want to parse them away // here, otherwise our bail-out scenarios below break. The better solution // might be to just implement a more or less complete expression parser. - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::l_brace: if (!StartOfExpression) { // Probably a missing closing brace. Bail out. @@ -656,10 +664,10 @@ void UnwrappedLineParser::parseReturn() { nextToken(); do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::l_brace: parseBracedList(); - if (FormatTok.Tok.isNot(tok::semi)) { + if (FormatTok->Tok.isNot(tok::semi)) { // Assume missing ';'. addUnwrappedLine(); return; @@ -684,10 +692,10 @@ void UnwrappedLineParser::parseReturn() { } void UnwrappedLineParser::parseParens() { - assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); + assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); nextToken(); do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::l_paren: parseParens(); break; @@ -708,7 +716,7 @@ void UnwrappedLineParser::parseParens() { } case tok::at: nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) parseBracedList(); break; default: @@ -719,12 +727,12 @@ void UnwrappedLineParser::parseParens() { } void UnwrappedLineParser::parseIfThenElse() { - assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); + assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); nextToken(); - if (FormatTok.Tok.is(tok::l_paren)) + if (FormatTok->Tok.is(tok::l_paren)) parseParens(); bool NeedsUnwrappedLine = false; - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBlock(/*MustBeDeclaration=*/ false); NeedsUnwrappedLine = true; } else { @@ -733,12 +741,12 @@ void UnwrappedLineParser::parseIfThenElse() { parseStructuralElement(); --Line->Level; } - if (FormatTok.Tok.is(tok::kw_else)) { + if (FormatTok->Tok.is(tok::kw_else)) { nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBlock(/*MustBeDeclaration=*/ false); addUnwrappedLine(); - } else if (FormatTok.Tok.is(tok::kw_if)) { + } else if (FormatTok->Tok.is(tok::kw_if)) { parseIfThenElse(); } else { addUnwrappedLine(); @@ -752,18 +760,18 @@ void UnwrappedLineParser::parseIfThenElse() { } void UnwrappedLineParser::parseNamespace() { - assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); + assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); nextToken(); - if (FormatTok.Tok.is(tok::identifier)) + if (FormatTok->Tok.is(tok::identifier)) nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) addUnwrappedLine(); parseBlock(/*MustBeDeclaration=*/ true, 0); // Munch the semicolon after a namespace. This is more common than one would // think. Puttin the semicolon into its own line is very ugly. - if (FormatTok.Tok.is(tok::semi)) + if (FormatTok->Tok.is(tok::semi)) nextToken(); addUnwrappedLine(); } @@ -771,12 +779,12 @@ void UnwrappedLineParser::parseNamespace() { } void UnwrappedLineParser::parseForOrWhileLoop() { - assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && + assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) && "'for' or 'while' expected"); nextToken(); - if (FormatTok.Tok.is(tok::l_paren)) + if (FormatTok->Tok.is(tok::l_paren)) parseParens(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBlock(/*MustBeDeclaration=*/ false); addUnwrappedLine(); } else { @@ -788,9 +796,9 @@ void UnwrappedLineParser::parseForOrWhileLoop() { } void UnwrappedLineParser::parseDoWhile() { - assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); + assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBlock(/*MustBeDeclaration=*/ false); } else { addUnwrappedLine(); @@ -800,7 +808,7 @@ void UnwrappedLineParser::parseDoWhile() { } // FIXME: Add error handling. - if (!FormatTok.Tok.is(tok::kw_while)) { + if (!FormatTok->Tok.is(tok::kw_while)) { addUnwrappedLine(); return; } @@ -810,15 +818,15 @@ void UnwrappedLineParser::parseDoWhile() { } void UnwrappedLineParser::parseLabel() { - if (FormatTok.Tok.isNot(tok::colon)) + if (FormatTok->Tok.isNot(tok::colon)) return; nextToken(); unsigned OldLineLevel = Line->Level; if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) --Line->Level; - if (CommentsBeforeNextToken.empty() && FormatTok.Tok.is(tok::l_brace)) { + if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { parseBlock(/*MustBeDeclaration=*/ false); - if (FormatTok.Tok.is(tok::kw_break)) + if (FormatTok->Tok.is(tok::kw_break)) parseStructuralElement(); // "break;" after "}" goes on the same line. } addUnwrappedLine(); @@ -826,20 +834,20 @@ void UnwrappedLineParser::parseLabel() { } void UnwrappedLineParser::parseCaseLabel() { - assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); + assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); // FIXME: fix handling of complex expressions here. do { nextToken(); - } while (!eof() && !FormatTok.Tok.is(tok::colon)); + } while (!eof() && !FormatTok->Tok.is(tok::colon)); parseLabel(); } void UnwrappedLineParser::parseSwitch() { - assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); + assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); nextToken(); - if (FormatTok.Tok.is(tok::l_paren)) + if (FormatTok->Tok.is(tok::l_paren)) parseParens(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBlock(/*MustBeDeclaration=*/ false, Style.IndentCaseLabels ? 2 : 1); addUnwrappedLine(); } else { @@ -853,30 +861,30 @@ void UnwrappedLineParser::parseSwitch() { void UnwrappedLineParser::parseAccessSpecifier() { nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. - if (FormatTok.Tok.is(tok::colon)) + if (FormatTok->Tok.is(tok::colon)) nextToken(); addUnwrappedLine(); } void UnwrappedLineParser::parseEnum() { nextToken(); - if (FormatTok.Tok.is(tok::identifier) || - FormatTok.Tok.is(tok::kw___attribute) || - FormatTok.Tok.is(tok::kw___declspec)) { + if (FormatTok->Tok.is(tok::identifier) || + FormatTok->Tok.is(tok::kw___attribute) || + FormatTok->Tok.is(tok::kw___declspec)) { nextToken(); // We can have macros or attributes in between 'enum' and the enum name. - if (FormatTok.Tok.is(tok::l_paren)) { + if (FormatTok->Tok.is(tok::l_paren)) { parseParens(); } - if (FormatTok.Tok.is(tok::identifier)) + if (FormatTok->Tok.is(tok::identifier)) nextToken(); } - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { nextToken(); addUnwrappedLine(); ++Line->Level; do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::l_paren: parseParens(); break; @@ -902,18 +910,19 @@ void UnwrappedLineParser::parseEnum() { void UnwrappedLineParser::parseRecord() { nextToken(); - if (FormatTok.Tok.is(tok::identifier) || - FormatTok.Tok.is(tok::kw___attribute) || - FormatTok.Tok.is(tok::kw___declspec)) { + if (FormatTok->Tok.is(tok::identifier) || + FormatTok->Tok.is(tok::kw___attribute) || + FormatTok->Tok.is(tok::kw___declspec)) { nextToken(); // We can have macros or attributes in between 'class' and the class name. - if (FormatTok.Tok.is(tok::l_paren)) { + if (FormatTok->Tok.is(tok::l_paren)) { parseParens(); } // The actual identifier can be a nested name specifier, and in macros // it is often token-pasted. - while (FormatTok.Tok.is(tok::identifier) || - FormatTok.Tok.is(tok::coloncolon) || FormatTok.Tok.is(tok::hashhash)) + while (FormatTok->Tok.is(tok::identifier) || + FormatTok->Tok.is(tok::coloncolon) || + FormatTok->Tok.is(tok::hashhash)) nextToken(); // Note that parsing away template declarations here leads to incorrectly @@ -926,15 +935,15 @@ void UnwrappedLineParser::parseRecord() { // and thus rule out the record production in case there is no template // (this would still leave us with an ambiguity between template function // and class declarations). - if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) { - while (!eof() && FormatTok.Tok.isNot(tok::l_brace)) { - if (FormatTok.Tok.is(tok::semi)) + if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { + while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { + if (FormatTok->Tok.is(tok::semi)) return; nextToken(); } } } - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) addUnwrappedLine(); @@ -946,16 +955,16 @@ void UnwrappedLineParser::parseRecord() { } void UnwrappedLineParser::parseObjCProtocolList() { - assert(FormatTok.Tok.is(tok::less) && "'<' expected."); + assert(FormatTok->Tok.is(tok::less) && "'<' expected."); do nextToken(); - while (!eof() && FormatTok.Tok.isNot(tok::greater)); + while (!eof() && FormatTok->Tok.isNot(tok::greater)); nextToken(); // Skip '>'. } void UnwrappedLineParser::parseObjCUntilAtEnd() { do { - if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { + if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { nextToken(); addUnwrappedLine(); break; @@ -969,18 +978,18 @@ void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { nextToken(); // interface name // @interface can be followed by either a base class, or a category. - if (FormatTok.Tok.is(tok::colon)) { + if (FormatTok->Tok.is(tok::colon)) { nextToken(); nextToken(); // base class name - } else if (FormatTok.Tok.is(tok::l_paren)) + } else if (FormatTok->Tok.is(tok::l_paren)) // Skip category, if present. parseParens(); - if (FormatTok.Tok.is(tok::less)) + if (FormatTok->Tok.is(tok::less)) parseObjCProtocolList(); // If instance variables are present, keep the '{' on the first line too. - if (FormatTok.Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) parseBlock(/*MustBeDeclaration=*/ true); // With instance variables, this puts '}' on its own line. Without instance @@ -994,11 +1003,11 @@ void UnwrappedLineParser::parseObjCProtocol() { nextToken(); nextToken(); // protocol name - if (FormatTok.Tok.is(tok::less)) + if (FormatTok->Tok.is(tok::less)) parseObjCProtocolList(); // Check for protocol declaration. - if (FormatTok.Tok.is(tok::semi)) { + if (FormatTok->Tok.is(tok::semi)) { nextToken(); return addUnwrappedLine(); } @@ -1034,15 +1043,15 @@ void UnwrappedLineParser::addUnwrappedLine() { } } -bool UnwrappedLineParser::eof() const { return FormatTok.Tok.is(tok::eof); } +bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); - for (SmallVectorImpl::const_iterator + for (SmallVectorImpl::const_iterator I = CommentsBeforeNextToken.begin(), E = CommentsBeforeNextToken.end(); I != E; ++I) { - if (I->NewlinesBefore && JustComments) { + if ((*I)->NewlinesBefore && JustComments) { addUnwrappedLine(); } pushToken(*I); @@ -1056,7 +1065,7 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { void UnwrappedLineParser::nextToken() { if (eof()) return; - flushComments(FormatTok.NewlinesBefore > 0); + flushComments(FormatTok->NewlinesBefore > 0); pushToken(FormatTok); readToken(); } @@ -1065,8 +1074,8 @@ void UnwrappedLineParser::readToken() { bool CommentsInCurrentLine = true; do { FormatTok = Tokens->getNextToken(); - while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && - (FormatTok.HasUnescapedNewline || FormatTok.IsFirst)) { + while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && + (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = @@ -1075,7 +1084,7 @@ void UnwrappedLineParser::readToken() { // Comments stored before the preprocessor directive need to be output // before the preprocessor directive, at the same level as the // preprocessor directive, as we consider them to apply to the directive. - flushComments(FormatTok.NewlinesBefore > 0); + flushComments(FormatTok->NewlinesBefore > 0); parsePPDirective(); } @@ -1084,9 +1093,9 @@ void UnwrappedLineParser::readToken() { continue; } - if (!FormatTok.Tok.is(tok::comment)) + if (!FormatTok->Tok.is(tok::comment)) return; - if (FormatTok.NewlinesBefore > 0 || FormatTok.IsFirst) { + if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) { CommentsInCurrentLine = false; } if (CommentsInCurrentLine) { @@ -1097,8 +1106,8 @@ void UnwrappedLineParser::readToken() { } while (!eof()); } -void UnwrappedLineParser::pushToken(const FormatToken &Tok) { - Line->Tokens.push_back(Tok); +void UnwrappedLineParser::pushToken(FormatToken *Tok) { + Line->Tokens.push_back(*Tok); if (MustBreakBeforeNextToken) { Line->Tokens.back().MustBreakBefore = true; MustBreakBeforeNextToken = false; diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index edac237a0b..03505ff69a 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -111,24 +111,11 @@ public: virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; }; -class FormatTokenSource { -public: - virtual ~FormatTokenSource() { - } - virtual FormatToken getNextToken() = 0; - - // FIXME: This interface will become an implementation detail of - // the UnwrappedLineParser once we switch to generate all tokens - // up-front. - virtual unsigned getPosition() { return 0; } - virtual FormatToken setPosition(unsigned Position) { - llvm_unreachable("Interface in transition; do not call!"); - } -}; +class FormatTokenSource; class UnwrappedLineParser { public: - UnwrappedLineParser(const FormatStyle &Style, FormatTokenSource &Tokens, + UnwrappedLineParser(const FormatStyle &Style, ArrayRef Tokens, UnwrappedLineConsumer &Callback); /// Returns true in case of a structural error. @@ -170,7 +157,7 @@ private: void nextToken(); void readToken(); void flushComments(bool NewlineBeforeNext); - void pushToken(const FormatToken &Tok); + void pushToken(FormatToken *Tok); void calculateBraceTypes(); void pushPPConditional(); @@ -190,8 +177,8 @@ private: // line as the previous token, or not. If not, they belong to the next token. // Since the next token might already be in a new unwrapped line, we need to // store the comments belonging to that token. - SmallVector CommentsBeforeNextToken; - FormatToken FormatTok; + SmallVector CommentsBeforeNextToken; + FormatToken *FormatTok; bool MustBreakBeforeNextToken; // The parsed lines. Only added to through \c CurrentLines. @@ -223,7 +210,7 @@ private: // FIXME: This is a temporary measure until we have reworked the ownership // of the format tokens. The goal is to have the actual tokens created and // owned outside of and handed into the UnwrappedLineParser. - SmallVector AllTokens; + ArrayRef AllTokens; // FIXME: Currently we cannot store attributes with tokens, as we treat // them as read-only; thus, we now store the brace state indexed by the