From: Daniel Jasper Date: Mon, 14 Jan 2013 13:08:07 +0000 (+0000) Subject: Refactor datastructure used in clang-format. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=995e8207877f6f5c8ed7d63f9b61e156dd171e50;p=clang Refactor datastructure used in clang-format. Main difference, add an AnnotatedLine class to hold information about a line while formatting. At the same time degrade the UnwrappedLine class to a class solely used for communicating between the UnwrappedLineParser and the Formatter. No functional changes intended. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@172403 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 57e9a76989..ad3f97c46b 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -93,6 +93,19 @@ public: AnnotatedToken *Parent; }; +class AnnotatedLine { +public: + AnnotatedLine(const FormatToken &FormatTok, unsigned Level, + bool InPPDirective) + : First(FormatTok), Level(Level), InPPDirective(InPPDirective) {} + AnnotatedToken First; + AnnotatedToken *Last; + + LineType Type; + unsigned Level; + bool InPPDirective; +}; + static prec::Level getPrecedence(const AnnotatedToken &Tok) { return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); } @@ -168,11 +181,13 @@ static void replacePPWhitespace( NewLineText + std::string(Spaces, ' '))); } -/// \brief Checks whether the (remaining) \c UnwrappedLine starting with -/// \p RootToken fits into \p Limit columns. -static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) { +/// \brief Calculates whether the (remaining) \c AnnotatedLine starting with +/// \p RootToken fits into \p Limit columns on a single line. +/// +/// If true, sets \p Length to the required length. +static bool fitsIntoLimit(const AnnotatedToken &RootToken, + unsigned Limit, unsigned* Length = 0) { unsigned Columns = RootToken.FormatTok.TokenLength; - bool FitsOnALine = true; const AnnotatedToken *Tok = &RootToken; while (!Tok->Children.empty()) { Tok = &Tok->Children[0]; @@ -181,11 +196,13 @@ static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) { // needs to be put on a new line if the line needs to be split. if (Columns > Limit || (Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) { - FitsOnALine = false; - break; + // FIXME: Remove this hack. + return false; } } - return FitsOnALine; + if (Length != 0) + *Length = Columns; + return true; } /// \brief Returns if a token is an Objective-C selector name. @@ -200,7 +217,7 @@ static bool isObjCSelectorName(const AnnotatedToken &Tok) { class UnwrappedLineFormatter { public: UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, - const UnwrappedLine &Line, unsigned FirstIndent, + const AnnotatedLine &Line, unsigned FirstIndent, bool FitsOnALine, const AnnotatedToken &RootToken, tooling::Replacements &Replaces, bool StructuralError) : Style(Style), SourceMgr(SourceMgr), Line(Line), @@ -599,7 +616,7 @@ private: FormatStyle Style; SourceManager &SourceMgr; - const UnwrappedLine &Line; + const AnnotatedLine &Line; const unsigned FirstIndent; const bool FitsOnALine; const AnnotatedToken &RootToken; @@ -616,10 +633,9 @@ private: /// \c UnwrappedLine. class TokenAnnotator { public: - TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style, - SourceManager &SourceMgr, Lexer &Lex) - : Style(Style), SourceMgr(SourceMgr), Lex(Lex), - RootToken(Line.RootToken) {} + TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, + AnnotatedLine &Line) + : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {} /// \brief A parser that gathers additional information about tokens. /// @@ -888,7 +904,9 @@ public: }; void createAnnotatedTokens(AnnotatedToken &Current) { - if (!Current.FormatTok.Children.empty()) { + if (Current.FormatTok.Children.empty()) { + Line.Last = &Current; + } else { Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0])); Current.Children.back().Parent = &Current; createAnnotatedTokens(Current.Children.back()); @@ -917,34 +935,30 @@ public: calculateExtraInformation(Current.Children[0]); } - bool annotate() { - createAnnotatedTokens(RootToken); - - AnnotatingParser Parser(RootToken); - CurrentLineType = Parser.parseLine(); - if (CurrentLineType == LT_Invalid) - return false; + void annotate() { + Line.Last = &Line.First; + createAnnotatedTokens(Line.First); - determineTokenTypes(RootToken, /*IsRHS=*/false); + AnnotatingParser Parser(Line.First); + Line.Type = Parser.parseLine(); + if (Line.Type == LT_Invalid) + return; - if (RootToken.Type == TT_ObjCMethodSpecifier) - CurrentLineType = LT_ObjCMethodDecl; - else if (RootToken.Type == TT_ObjCDecl) - CurrentLineType = LT_ObjCDecl; - else if (RootToken.Type == TT_ObjCProperty) - CurrentLineType = LT_ObjCProperty; + determineTokenTypes(Line.First, /*IsRHS=*/false); - if (!RootToken.Children.empty()) - calculateExtraInformation(RootToken.Children[0]); - return true; - } + if (Line.First.Type == TT_ObjCMethodSpecifier) + Line.Type = LT_ObjCMethodDecl; + else if (Line.First.Type == TT_ObjCDecl) + Line.Type = LT_ObjCDecl; + else if (Line.First.Type == TT_ObjCProperty) + Line.Type = LT_ObjCProperty; - LineType getLineType() { - return CurrentLineType; - } + Line.First.SpaceRequiredBefore = true; + Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; + Line.First.CanBreakBefore = Line.First.MustBreakBefore; - const AnnotatedToken &getRootToken() { - return RootToken; + if (!Line.First.Children.empty()) + calculateExtraInformation(Line.First.Children[0]); } private: @@ -1079,7 +1093,7 @@ private: return false; if (Right.is(tok::less) && (Left.is(tok::kw_template) || - (CurrentLineType == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) + (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) return true; if (Left.is(tok::arrow) || Right.is(tok::arrow)) return false; @@ -1119,7 +1133,7 @@ private: if (Left.is(tok::l_paren)) return false; if (Right.is(tok::l_paren)) { - return CurrentLineType == LT_ObjCDecl || Left.is(tok::kw_if) || + return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) || Left.is(tok::kw_for) || Left.is(tok::kw_while) || Left.is(tok::kw_switch) || Left.is(tok::kw_return) || Left.is(tok::kw_catch) || Left.is(tok::kw_new) || @@ -1134,7 +1148,7 @@ private: } bool spaceRequiredBefore(const AnnotatedToken &Tok) { - if (CurrentLineType == LT_ObjCMethodDecl) { + if (Line.Type == LT_ObjCMethodDecl) { if (Tok.is(tok::identifier) && !Tok.Children.empty() && Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier)) return true; @@ -1151,7 +1165,7 @@ private: // Don't space between ':' and '(' return false; } - if (CurrentLineType == LT_ObjCProperty && + if (Line.Type == LT_ObjCProperty && (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) return false; @@ -1167,7 +1181,7 @@ private: if (Tok.Parent->Type == TT_OverloadedOperator) return false; if (Tok.is(tok::colon)) - return RootToken.isNot(tok::kw_case) && !Tok.Children.empty() && + return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr; if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) @@ -1185,7 +1199,7 @@ private: return true; if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) return false; - if (Tok.is(tok::less) && RootToken.is(tok::hash)) + if (Tok.is(tok::less) && Line.First.is(tok::hash)) return true; if (Tok.Type == TT_TrailingUnaryOperator) return false; @@ -1194,7 +1208,7 @@ private: bool canBreakBefore(const AnnotatedToken &Right) { const AnnotatedToken &Left = *Right.Parent; - if (CurrentLineType == LT_ObjCMethodDecl) { + if (Line.Type == LT_ObjCMethodDecl) { if (Right.is(tok::identifier) && !Right.Children.empty() && Right.Children[0].is(tok::colon) && Left.is(tok::identifier)) return true; @@ -1221,7 +1235,7 @@ private: if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr) return false; - if (Left.is(tok::equal) && CurrentLineType == LT_VirtualFunctionDecl) + if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) return false; if (Right.is(tok::comment)) @@ -1246,8 +1260,7 @@ private: FormatStyle Style; SourceManager &SourceMgr; Lexer &Lex; - LineType CurrentLineType; - AnnotatedToken RootToken; + AnnotatedLine &Line; }; class LexerBasedFormatTokenSource : public FormatTokenSource { @@ -1347,35 +1360,32 @@ public: UnwrappedLineParser Parser(Diag, Style, Tokens, *this); StructuralError = Parser.parse(); unsigned PreviousEndOfLineColumn = 0; - for (std::vector::iterator I = UnwrappedLines.begin(), - E = UnwrappedLines.end(); + for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { + TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]); + Annotator.annotate(); + } + for (std::vector::iterator I = AnnotatedLines.begin(), + E = AnnotatedLines.end(); I != E; ++I) { - const UnwrappedLine &TheLine = *I; - if (touchesRanges(TheLine)) { - OwningPtr AnnotatedLine( - new TokenAnnotator(TheLine, Style, SourceMgr, Lex)); - if (!AnnotatedLine->annotate()) - break; - unsigned Indent = formatFirstToken(AnnotatedLine->getRootToken(), - TheLine.Level, TheLine.InPPDirective, + const AnnotatedLine &TheLine = *I; + if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) { + unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level, + TheLine.InPPDirective, PreviousEndOfLineColumn); - - UnwrappedLine Line(TheLine); - bool FitsOnALine = tryFitMultipleLinesInOne(Indent, Line, AnnotatedLine, - I, E); - UnwrappedLineFormatter Formatter( - Style, SourceMgr, Line, Indent, FitsOnALine, - AnnotatedLine->getRootToken(), Replaces, StructuralError); + bool FitsOnALine = tryFitMultipleLinesInOne(Indent, I, E); + UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, + FitsOnALine, TheLine.First, Replaces, + StructuralError); PreviousEndOfLineColumn = Formatter.format(); } else { // If we did not reformat this unwrapped line, the column at the end of // the last token is unchanged - thus, we can calculate the end of the // last token, and return the result. - const FormatToken *Last = getLastInLine(TheLine); PreviousEndOfLineColumn = - SourceMgr.getSpellingColumnNumber(Last->Tok.getLocation()) + - Lex.MeasureTokenLength(Last->Tok.getLocation(), SourceMgr, - Lex.getLangOpts()) - + SourceMgr.getSpellingColumnNumber( + TheLine.Last->FormatTok.Tok.getLocation()) + + Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(), + SourceMgr, Lex.getLangOpts()) - 1; } } @@ -1389,81 +1399,85 @@ private: /// if possible; note that \c I will be incremented when lines are merged. /// /// Returns whether the resulting \c Line can fit in a single line. - bool tryFitMultipleLinesInOne(unsigned Indent, UnwrappedLine &Line, - OwningPtr &AnnotatedLine, - std::vector::iterator &I, - std::vector::iterator E) { + bool tryFitMultipleLinesInOne(unsigned Indent, + std::vector::iterator &I, + std::vector::iterator E) { unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent; // Check whether the UnwrappedLine can be put onto a single line. If // so, this is bound to be the optimal solution (by definition) and we // don't need to analyze the entire solution space. - bool FitsOnALine = fitsIntoLimit(AnnotatedLine->getRootToken(), Limit); - if (!FitsOnALine || I + 1 == E || I + 2 == E) - return FitsOnALine; + unsigned LengthLine1 = 0; + if (!fitsIntoLimit(I->First, Limit, &LengthLine1)) + return false; - // Try to merge the next two lines if possible. - UnwrappedLine Combined(Line); + // Check that we still have three lines and they fit into the limit. + if (I + 1 == E || I + 2 == E) + return true; + unsigned LengthLine2 = 0; + unsigned LengthLine3 = 0; + if (!fitsIntoLimit((I + 1)->First, Limit, &LengthLine2) || + !fitsIntoLimit((I + 2)->First, Limit, &LengthLine3)) + return true; + if (LengthLine1 + LengthLine2 + LengthLine3 + 2 > Limit) // Two spaces. + return true; // First, check that the current line allows merging. This is the case if // we're not in a control flow statement and the last token is an opening // brace. - FormatToken *Last = &Combined.RootToken; + AnnotatedLine& Line = *I; bool AllowedTokens = - Last->Tok.isNot(tok::kw_if) && Last->Tok.isNot(tok::kw_while) && - Last->Tok.isNot(tok::kw_do) && Last->Tok.isNot(tok::r_brace) && - Last->Tok.isNot(tok::kw_else) && Last->Tok.isNot(tok::kw_try) && - Last->Tok.isNot(tok::kw_catch) && Last->Tok.isNot(tok::kw_for) && + Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) && + Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) && + Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) && + Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) && // This gets rid of all ObjC @ keywords and methods. - Last->Tok.isNot(tok::at) && Last->Tok.isNot(tok::minus) && - Last->Tok.isNot(tok::plus); - while (!Last->Children.empty()) - Last = &Last->Children.back(); - if (!Last->Tok.is(tok::l_brace)) - return FitsOnALine; + Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) && + Line.First.isNot(tok::plus); + if (Line.Last->isNot(tok::l_brace) || !AllowedTokens) + return true; // Second, check that the next line does not contain any braces - if it // does, readability declines when putting it into a single line. - const FormatToken *Next = &(I + 1)->RootToken; - while (Next) { - AllowedTokens = AllowedTokens && !Next->Tok.is(tok::l_brace) && - !Next->Tok.is(tok::r_brace); - Last->Children.push_back(*Next); - Last = &Last->Children[0]; - Last->Children.clear(); - Next = Next->Children.empty() ? NULL : &Next->Children.back(); - } + const AnnotatedToken *Tok = &(I + 1)->First; + if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) + return true; + do { + if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace)) + return true; + Tok = Tok->Children.empty() ? NULL : &Tok->Children.back(); + } while (Tok != NULL); // Last, check that the third line contains a single closing brace. - Next = &(I + 2)->RootToken; - AllowedTokens = AllowedTokens && Next->Tok.is(tok::r_brace); - if (!Next->Children.empty() || !AllowedTokens) - return FitsOnALine; - Last->Children.push_back(*Next); - - OwningPtr CombinedAnnotator( - new TokenAnnotator(Combined, Style, SourceMgr, Lex)); - if (CombinedAnnotator->annotate() && - fitsIntoLimit(CombinedAnnotator->getRootToken(), Limit)) { - // If the merged line fits, we use that instead and skip the next two - // lines. - AnnotatedLine.reset(CombinedAnnotator.take()); - Line = Combined; - I += 2; + Tok = &(I + 2)->First; + if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) || + Tok->MustBreakBefore) + return true; + + // If the merged line fits, we use that instead and skip the next two lines. + Line.Last->Children.push_back((I + 1)->First); + while (!Line.Last->Children.empty()) { + Line.Last->Children[0].Parent = Line.Last; + Line.Last = &Line.Last->Children[0]; } - return FitsOnALine; + + join(Line, *(I + 1)); + join(Line, *(I + 2)); + I += 2; + return true; } - const FormatToken *getLastInLine(const UnwrappedLine &TheLine) { - const FormatToken *Last = &TheLine.RootToken; - while (!Last->Children.empty()) - Last = &Last->Children.back(); - return Last; + void join(AnnotatedLine &A, const AnnotatedLine &B) { + A.Last->Children.push_back(B.First); + while (!A.Last->Children.empty()) { + A.Last->Children[0].Parent = A.Last; + A.Last = &A.Last->Children[0]; + } } - bool touchesRanges(const UnwrappedLine &TheLine) { - const FormatToken *First = &TheLine.RootToken; - const FormatToken *Last = getLastInLine(TheLine); + bool touchesRanges(const AnnotatedLine &TheLine) { + const FormatToken *First = &TheLine.First.FormatTok; + const FormatToken *Last = &TheLine.Last->FormatTok; CharSourceRange LineRange = CharSourceRange::getTokenRange( First->Tok.getLocation(), Last->Tok.getLocation()); @@ -1478,7 +1492,8 @@ private: } virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { - UnwrappedLines.push_back(TheLine); + AnnotatedLines.push_back( + AnnotatedLine(TheLine.RootToken, TheLine.Level, TheLine.InPPDirective)); } /// \brief Add a new line and the required indent before the first Token @@ -1526,7 +1541,7 @@ private: SourceManager &SourceMgr; tooling::Replacements Replaces; std::vector Ranges; - std::vector UnwrappedLines; + std::vector AnnotatedLines; bool StructuralError; };