From ceadeb9d7861b4d48124b8fb0d2a77eed159d64c Mon Sep 17 00:00:00 2001 From: Daniel Jasper Date: Tue, 4 Nov 2014 12:41:02 +0000 Subject: [PATCH] clang-format: Use identifier table for keywords in other languages. Slightly easier to write, more efficient and prevents bugs by misspelling them. No functional changes intended. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@221259 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Format/ContinuationIndenter.cpp | 11 ++-- lib/Format/ContinuationIndenter.h | 7 ++- lib/Format/Format.cpp | 19 ++++--- lib/Format/FormatToken.h | 78 ++++++++++++++++++++++++----- lib/Format/TokenAnnotator.cpp | 50 +++++++++--------- lib/Format/TokenAnnotator.h | 7 ++- lib/Format/UnwrappedLineParser.cpp | 14 +++--- lib/Format/UnwrappedLineParser.h | 6 ++- 8 files changed, 127 insertions(+), 65 deletions(-) diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index f6fbbba440..adbd8a3d1d 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -57,12 +57,13 @@ static bool startsNextParameter(const FormatToken &Current, } ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, + const AdditionalKeywords &Keywords, SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions) - : Style(Style), SourceMgr(SourceMgr), Whitespaces(Whitespaces), - Encoding(Encoding), + : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), + Whitespaces(Whitespaces), Encoding(Encoding), BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), CommentPragmasRegex(Style.CommentPragmas) {} @@ -507,8 +508,8 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { NextNonComment = &Current; // Java specific bits. - if (Style.Language == FormatStyle::LK_Java && Current.is(tok::identifier) && - (Current.TokenText == "implements" || Current.TokenText == "extends")) + if (Style.Language == FormatStyle::LK_Java && + Current.isOneOf(Keywords.kw_implements, Keywords.kw_extends)) return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent + Style.ContinuationIndentWidth); @@ -673,7 +674,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, } State.Stack[State.Stack.size() - 2].JSFunctionInlined = false; } - if (Current.TokenText == "function") + if (Current.is(Keywords.kw_function)) State.Stack.back().JSFunctionInlined = !Newline && Previous && Previous->Type != TT_DictLiteral && Previous->Type != TT_ConditionalExpr && diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h index 0f059fbd15..5abbe7e9a2 100644 --- a/lib/Format/ContinuationIndenter.h +++ b/lib/Format/ContinuationIndenter.h @@ -17,6 +17,7 @@ #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H #include "Encoding.h" +#include "FormatToken.h" #include "clang/Format/Format.h" #include "llvm/Support/Regex.h" @@ -35,8 +36,9 @@ class ContinuationIndenter { public: /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in /// column \p FirstIndent. - ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr, - WhitespaceManager &Whitespaces, + ContinuationIndenter(const FormatStyle &Style, + const AdditionalKeywords &Keywords, + SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions); @@ -134,6 +136,7 @@ private: bool nextIsMultilineString(const LineState &State); FormatStyle Style; + const AdditionalKeywords &Keywords; SourceManager &SourceMgr; WhitespaceManager &Whitespaces; encoding::Encoding Encoding; diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 300bd626ed..1eb6d97fc5 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -1334,10 +1334,10 @@ public: FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), - Column(0), TrailingWhitespace(0), - SourceMgr(SourceMgr), ID(ID), Style(Style), - IdentTable(getFormattingLangOpts(Style)), Encoding(Encoding), - FirstInLineIndex(0), FormattingDisabled(false) { + Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Style(Style), IdentTable(getFormattingLangOpts(Style)), + Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), + FormattingDisabled(false) { Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, getFormattingLangOpts(Style))); Lex->SetKeepWhitespaceMode(true); @@ -1359,7 +1359,7 @@ public: return Tokens; } - IdentifierTable &getIdentTable() { return IdentTable; } + const AdditionalKeywords &getKeywords() { return Keywords; } private: void tryMergePreviousTokens() { @@ -1722,6 +1722,7 @@ private: FileID ID; FormatStyle &Style; IdentifierTable IdentTable; + AdditionalKeywords Keywords; encoding::Encoding Encoding; llvm::SpecificBumpPtrAllocator Allocator; // Index (in 'Tokens') of the last token that starts a new line. @@ -1805,7 +1806,8 @@ public: tooling::Replacements Result; FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); - UnwrappedLineParser Parser(Style, Tokens.lex(), *this); + UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), + *this); bool StructuralError = Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; @@ -1836,7 +1838,7 @@ public: tooling::Replacements format(SmallVectorImpl &AnnotatedLines, bool StructuralError, FormatTokenLexer &Tokens) { - TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); + TokenAnnotator Annotator(Style, Tokens.getKeywords()); for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { Annotator.annotate(*AnnotatedLines[i]); } @@ -1847,7 +1849,8 @@ public: computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); Annotator.setCommentLineLevels(AnnotatedLines); - ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding, + ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, + Whitespaces, Encoding, BinPackInconclusiveFunctions); UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style); Formatter.format(AnnotatedLines, /*DryRun=*/false); diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index aca529809c..c2cc385602 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -16,6 +16,7 @@ #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H +#include "clang/Basic/IdentifierTable.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" @@ -270,28 +271,35 @@ struct FormatToken { bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { + bool is(const IdentifierInfo *II) const { + return II && II == Tok.getIdentifierInfo(); + } + + template + bool isOneOf(T K1, T K2) const { return is(K1) || is(K2); } - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { + template + bool isOneOf(T K1, T K2, T K3) const { return is(K1) || is(K2) || is(K3); } - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, - tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, - tok::TokenKind K6 = tok::NUM_TOKENS, - tok::TokenKind K7 = tok::NUM_TOKENS, - tok::TokenKind K8 = tok::NUM_TOKENS, - tok::TokenKind K9 = tok::NUM_TOKENS, - tok::TokenKind K10 = tok::NUM_TOKENS, - tok::TokenKind K11 = tok::NUM_TOKENS, - tok::TokenKind K12 = tok::NUM_TOKENS) const { + template + bool isOneOf(T K1, T K2, T K3, T K4, T K5 = tok::NUM_TOKENS, + T K6 = tok::NUM_TOKENS, T K7 = tok::NUM_TOKENS, + T K8 = tok::NUM_TOKENS, T K9 = tok::NUM_TOKENS, + T K10 = tok::NUM_TOKENS, T K11 = tok::NUM_TOKENS, + T K12 = tok::NUM_TOKENS) const { return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || is(K8) || is(K9) || is(K10) || is(K11) || is(K12); } - bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); } + template + bool isNot(T Kind) const { + return Tok.isNot(Kind); + } + bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { @@ -523,6 +531,52 @@ private: bool HasNestedBracedList; }; +/// \brief Encapsulates keywords that are context sensitive or for languages not +/// properly supported by Clang's lexer. +struct AdditionalKeywords { + AdditionalKeywords(IdentifierTable &IdentTable) { + kw_in = &IdentTable.get("in"); + kw_NS_ENUM = &IdentTable.get("NS_ENUM"); + + kw_finally = &IdentTable.get("finally"); + kw_function = &IdentTable.get("function"); + kw_var = &IdentTable.get("var"); + + kw_extends = &IdentTable.get("extends"); + kw_implements = &IdentTable.get("implements"); + kw_synchronized = &IdentTable.get("synchronized"); + kw_throws = &IdentTable.get("throws"); + + kw_option = &IdentTable.get("option"); + kw_optional = &IdentTable.get("optional"); + kw_repeated = &IdentTable.get("repeated"); + kw_required = &IdentTable.get("required"); + kw_returns = &IdentTable.get("returns"); + } + + // ObjC context sensitive keywords. + IdentifierInfo *kw_in; + IdentifierInfo *kw_NS_ENUM; + + // JavaScript keywords. + IdentifierInfo *kw_finally; + IdentifierInfo *kw_function; + IdentifierInfo *kw_var; + + // Java keywords. + IdentifierInfo *kw_extends; + IdentifierInfo *kw_implements; + IdentifierInfo *kw_synchronized; + IdentifierInfo *kw_throws; + + // Proto keywords. + IdentifierInfo *kw_option; + IdentifierInfo *kw_optional; + IdentifierInfo *kw_repeated; + IdentifierInfo *kw_required; + IdentifierInfo *kw_returns; +}; + } // namespace format } // namespace clang diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index c426bed316..d00e648706 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -32,9 +32,9 @@ namespace { class AnnotatingParser { public: AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, - IdentifierInfo &Ident_in) + const AdditionalKeywords &Keywords) : Style(Style), Line(Line), CurrentToken(Line.First), - KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) { + KeywordVirtualFound(false), AutoFound(false), Keywords(Keywords) { Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); resetTokenMetadata(CurrentToken); } @@ -353,7 +353,7 @@ private: if (Current->Type == TT_LambdaLSquare || (Current->is(tok::caret) && Current->Type == TT_UnaryOperator) || (Style.Language == FormatStyle::LK_JavaScript && - Current->TokenText == "function")) { + Current->is(Keywords.kw_function))) { ++Left->BlockParameterCount; } if (Current->is(tok::comma)) { @@ -513,8 +513,7 @@ private: parseTemplateDeclaration(); break; case tok::identifier: - if (Line.First->is(tok::kw_for) && - Tok->Tok.getIdentifierInfo() == &Ident_in) + if (Line.First->is(tok::kw_for) && Tok->is(Keywords.kw_in)) Tok->Type = TT_ObjCForIn; break; case tok::comma: @@ -909,7 +908,7 @@ private: Tok.Previous->Type == TT_TemplateCloser || Tok.Previous->isSimpleTypeSpecifier(); if (Style.Language == FormatStyle::LK_JavaScript && Tok.Next && - Tok.Next->TokenText == "in") + Tok.Next->is(Keywords.kw_in)) return false; bool ParensCouldEndDecl = Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); @@ -1061,7 +1060,7 @@ private: FormatToken *CurrentToken; bool KeywordVirtualFound; bool AutoFound; - IdentifierInfo &Ident_in; + const AdditionalKeywords &Keywords; }; static int PrecedenceUnaryOperator = prec::PointerToMember + 1; @@ -1071,8 +1070,9 @@ static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; /// operator precedence. class ExpressionParser { public: - ExpressionParser(const FormatStyle &Style, AnnotatedLine &Line) - : Style(Style), Current(Line.First) {} + ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + AnnotatedLine &Line) + : Style(Style), Keywords(Keywords), Current(Line.First) {} /// \brief Parse expressions with the given operatore precedence. void parse(int Precedence = 0) { @@ -1176,9 +1176,7 @@ private: else if (Current->isOneOf(tok::period, tok::arrow)) return PrecedenceArrowAndPeriod; else if (Style.Language == FormatStyle::LK_Java && - Current->is(tok::identifier) && - (Current->TokenText == "extends" || - Current->TokenText == "implements")) + Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) return 0; } return -1; @@ -1238,6 +1236,7 @@ private: } const FormatStyle &Style; + const AdditionalKeywords &Keywords; FormatToken *Current; }; @@ -1265,12 +1264,12 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { I != E; ++I) { annotate(**I); } - AnnotatingParser Parser(Style, Line, Ident_in); + AnnotatingParser Parser(Style, Line, Keywords); Line.Type = Parser.parseLine(); if (Line.Type == LT_Invalid) return; - ExpressionParser ExprParser(Style, Line); + ExpressionParser ExprParser(Style, Keywords, Line); ExprParser.parse(); if (Line.First->Type == TT_ObjCMethodSpecifier) @@ -1476,7 +1475,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.Type == TT_LeadingJavaAnnotation) return 1; - if (Style.Language == FormatStyle::LK_Java && Right.TokenText == "implements") + if (Style.Language == FormatStyle::LK_Java && + Right.is(Keywords.kw_implements)) return 2; if (Right.isMemberAccess()) { @@ -1673,17 +1673,17 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Left = *Right.Previous; if (Style.Language == FormatStyle::LK_Proto) { if (Right.is(tok::period) && - (Left.TokenText == "optional" || Left.TokenText == "required" || - Left.TokenText == "repeated")) + Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, + Keywords.kw_repeated)) return true; if (Right.is(tok::l_paren) && - (Left.TokenText == "returns" || Left.TokenText == "option")) + Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) return true; } else if (Style.Language == FormatStyle::LK_JavaScript) { - if (Left.TokenText == "var") + if (Left.is(Keywords.kw_var)) return true; } else if (Style.Language == FormatStyle::LK_Java) { - if (Left.TokenText == "synchronized" && Right.is(tok::l_paren)) + if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) return Style.SpaceBeforeParens != FormatStyle::SBPO_Never; if (Left.is(tok::kw_static) && Right.Type == TT_TemplateOpener) return true; @@ -1858,13 +1858,11 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, const FormatToken &Left = *Right.Previous; if (Style.Language == FormatStyle::LK_Java) { - if (Left.is(tok::identifier) && - (Left.TokenText == "throws" || Left.TokenText == "extends" || - Left.TokenText == "implements")) + if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, + Keywords.kw_implements)) return false; - if (Right.is(tok::identifier) && - (Right.TokenText == "throws" || Right.TokenText == "extends" || - Right.TokenText == "implements")) + if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, + Keywords.kw_implements)) return true; } diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index 88adc7503d..fc4d1d66e7 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -108,8 +108,8 @@ private: /// \c UnwrappedLine. class TokenAnnotator { public: - TokenAnnotator(const FormatStyle &Style, IdentifierInfo &Ident_in) - : Style(Style), Ident_in(Ident_in) {} + TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) + : Style(Style), Keywords(Keywords) {} /// \brief Adapts the indent levels of comment lines to the indent of the /// subsequent line. @@ -139,8 +139,7 @@ private: const FormatStyle &Style; - // Contextual keywords: - IdentifierInfo &Ident_in; + const AdditionalKeywords &Keywords; }; } // end namespace format diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 0c8d486f0e..0b37acbebf 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -202,12 +202,13 @@ private: } // end anonymous namespace UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, + const AdditionalKeywords &Keywords, ArrayRef Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), StructuralError(false), Style(Style), - Tokens(nullptr), Callback(Callback), AllTokens(Tokens), - PPBranchLevel(-1) {} + Keywords(Keywords), Tokens(nullptr), Callback(Callback), + AllTokens(Tokens), PPBranchLevel(-1) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -746,8 +747,7 @@ void UnwrappedLineParser::parseStructuralElement() { break; case tok::kw_typedef: nextToken(); - // FIXME: Use the IdentifierTable instead. - if (FormatTok->TokenText == "NS_ENUM") + if (FormatTok->is(Keywords.kw_NS_ENUM)) parseEnum(); break; case tok::kw_struct: @@ -977,7 +977,7 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { // replace this by using parseAssigmentExpression() inside. do { if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->TokenText == "function") { + FormatTok->is(Keywords.kw_function)) { tryToParseJSFunction(); continue; } @@ -1047,7 +1047,7 @@ void UnwrappedLineParser::parseParens() { break; case tok::identifier: if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->TokenText == "function") + FormatTok->is(Keywords.kw_function)) tryToParseJSFunction(); else nextToken(); @@ -1177,7 +1177,7 @@ void UnwrappedLineParser::parseTryCatch() { while (FormatTok->is(tok::kw_catch) || ((Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) && - FormatTok->TokenText == "finally")) { + FormatTok->is(Keywords.kw_finally))) { nextToken(); while (FormatTok->isNot(tok::l_brace)) { if (FormatTok->is(tok::l_paren)) { diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index d2f17e015c..f047540cf5 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -60,7 +60,9 @@ class FormatTokenSource; class UnwrappedLineParser { public: - UnwrappedLineParser(const FormatStyle &Style, ArrayRef Tokens, + UnwrappedLineParser(const FormatStyle &Style, + const AdditionalKeywords &Keywords, + ArrayRef Tokens, UnwrappedLineConsumer &Callback); /// Returns true in case of a structural error. @@ -158,6 +160,8 @@ private: bool StructuralError; const FormatStyle &Style; + const AdditionalKeywords &Keywords; + FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; -- 2.40.0