From 052685c170fcc4fa057c68a44425e6d9b945a167 Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Tue, 19 Mar 2013 17:41:36 +0000 Subject: [PATCH] Split long lines in multi-line comments. Summary: This is implementation for /* */ comments only. Reviewers: djasper, klimek Reviewed By: djasper CC: cfe-commits Differential Revision: http://llvm-reviews.chandlerc.com/D547 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@177415 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Format/Format.cpp | 204 +++++++++++++++++++++++--------- unittests/Format/FormatTest.cpp | 128 ++++++++++++++++++++ 2 files changed, 275 insertions(+), 57 deletions(-) diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index a17823f4ce..dd8f6cde03 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -103,13 +103,13 @@ static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) { /// trailing line comments. class WhitespaceManager { public: - WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} + WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style) + : SourceMgr(SourceMgr), Style(Style) {} /// \brief Replaces the whitespace in front of \p Tok. Only call once for /// each \c AnnotatedToken. void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn, - const FormatStyle &Style) { + unsigned Spaces, unsigned WhitespaceStartColumn) { // 2+ newlines mean an empty line separating logic scopes. if (NewLines >= 2) alignComments(); @@ -146,7 +146,7 @@ public: alignComments(); if (Tok.Type == TT_BlockComment) - indentBlockComment(Tok.FormatTok, Spaces); + indentBlockComment(Tok, Spaces, false); storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces)); } @@ -157,11 +157,12 @@ public: /// This function and \c replaceWhitespace have the same behavior if /// \c Newlines == 0. void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn, - const FormatStyle &Style) { - storeReplacement( - Tok.FormatTok, - getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style)); + unsigned Spaces, unsigned WhitespaceStartColumn) { + if (Tok.Type == TT_BlockComment) + indentBlockComment(Tok, Spaces, true); + + storeReplacement(Tok.FormatTok, + getNewLineText(NewLines, Spaces, WhitespaceStartColumn)); } /// \brief Inserts a line break into the middle of a token. @@ -171,19 +172,19 @@ public: /// /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are /// used to generate the correct line break. - void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix, - StringRef Postfix, bool InPPDirective, unsigned Spaces, - unsigned WhitespaceStartColumn, const FormatStyle &Style) { + void breakToken(const FormatToken &Tok, unsigned Offset, + unsigned ReplaceChars, StringRef Prefix, StringRef Postfix, + bool InPPDirective, unsigned Spaces, + unsigned WhitespaceStartColumn) { std::string NewLineText; if (!InPPDirective) NewLineText = getNewLineText(1, Spaces); else - NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style); + NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn); std::string ReplacementText = (Prefix + NewLineText + Postfix).str(); - SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart - .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset); - Replaces.insert( - tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText)); + SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset); + Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars, + ReplacementText)); } /// \brief Returns all the \c Replacements created during formatting. @@ -193,33 +194,122 @@ public: } private: - void indentBlockComment(const FormatToken &Tok, int Indent) { - SourceLocation TokenLoc = Tok.Tok.getLocation(); - int IndentDelta = Indent - SourceMgr.getSpellingColumnNumber(TokenLoc) + 1; - const char *Start = SourceMgr.getCharacterData(TokenLoc); - const char *Current = Start; - const char *TokEnd = Current + Tok.TokenLength; - llvm::SmallVector LineStarts; - while (Current < TokEnd) { - if (*Current == '\n') { - ++Current; - LineStarts.push_back(TokenLoc.getLocWithOffset(Current - Start)); - // If we need to outdent the line, check that it's indented enough. - for (int i = 0; i < -IndentDelta; ++i, ++Current) - if (Current >= TokEnd || *Current != ' ') - return; - } else { - ++Current; + /// \brief Finds a common prefix of lines of a block comment to properly + /// indent (and possibly decorate with '*'s) added lines. + /// + /// The first line is ignored (it's special and starts with /*). + /// When there are less than three lines, we don't have enough information, so + /// better use no prefix. + static StringRef findCommentLinesPrefix(ArrayRef Lines, + const char *PrefixChars = " *") { + if (Lines.size() < 3) + return ""; + StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars)); + for (size_t i = 2; i < Lines.size(); ++i) { + for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) { + if (Prefix[j] != Lines[i][j]) { + Prefix = Prefix.substr(0, j); + break; + } } } + return Prefix; + } + + void splitLineInComment(const FormatToken &Tok, StringRef Line, + size_t StartColumn, StringRef LinePrefix, + bool InPPDirective, bool CommentHasMoreLines, + const char *WhiteSpaceChars = " ") { + size_t ColumnLimit = + Style.ColumnLimit - LinePrefix.size() - (InPPDirective ? 2 : 0); + + if (Line.size() <= ColumnLimit) + return; + + const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation()); + while (Line.rtrim().size() > ColumnLimit) { + // Try to break at the last whitespace before the column limit. + size_t SpacePos = + Line.find_last_of(WhiteSpaceChars, ColumnLimit + 1); + if (SpacePos == StringRef::npos) { + // Try to find any whitespace in the line. + SpacePos = Line.find_first_of(WhiteSpaceChars); + if (SpacePos == StringRef::npos) // No whitespace found, give up. + break; + } + + StringRef NextCut = Line.substr(0, SpacePos).rtrim(); + StringRef RemainingLine = Line.substr(SpacePos).ltrim(); + if (RemainingLine.empty()) + break; + Line = RemainingLine; + + size_t ReplaceChars = Line.begin() - NextCut.end(); + breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix, + InPPDirective, 0, + NextCut.size() + LinePrefix.size() + StartColumn); + StartColumn = 0; + } + + StringRef TrimmedLine = Line.rtrim(); + if (TrimmedLine != Line || (InPPDirective && CommentHasMoreLines)) { + // Remove trailing whitespace/insert backslash. + breakToken(Tok, TrimmedLine.end() - TokenStart, + Line.size() - TrimmedLine.size() + 1, "", "", InPPDirective, 0, + TrimmedLine.size() + LinePrefix.size()); + } + } + + void indentBlockComment(const AnnotatedToken &Tok, int Indent, + bool InPPDirective) { + const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation(); + const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1; + const int IndentDelta = Indent - CurrentIndent; + const StringRef Text(SourceMgr.getCharacterData(TokenLoc), + Tok.FormatTok.TokenLength); + assert(Text.startswith("/*") && Text.endswith("*/")); + + SmallVector Lines; + Text.split(Lines, "\n"); + + if (IndentDelta > 0) { + std::string WhiteSpace(IndentDelta, ' '); + for (size_t i = 1; i < Lines.size(); ++i) { + Replaces.insert(tooling::Replacement( + SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()), + 0, WhiteSpace)); + } + } else if (IndentDelta < 0) { + std::string WhiteSpace(-IndentDelta, ' '); + // Check that the line is indented enough. + for (size_t i = 1; i < Lines.size(); ++i) { + if (!Lines[i].startswith(WhiteSpace)) + return; + } + for (size_t i = 1; i < Lines.size(); ++i) { + Replaces.insert(tooling::Replacement( + SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()), + -IndentDelta, "")); + } + } + + // Split long lines in comments. + const StringRef CurrentPrefix = findCommentLinesPrefix(Lines); + size_t PrefixSize = CurrentPrefix.size(); + std::string NewPrefix = + (IndentDelta < 0) ? CurrentPrefix.substr(-IndentDelta).str() + : std::string(IndentDelta, ' ') + CurrentPrefix.str(); + + if (CurrentPrefix.endswith("*")) { + NewPrefix += " "; + ++PrefixSize; + } - for (size_t i = 0; i < LineStarts.size(); ++i) { - if (IndentDelta > 0) - Replaces.insert(tooling::Replacement(SourceMgr, LineStarts[i], 0, - std::string(IndentDelta, ' '))); - else if (IndentDelta < 0) - Replaces.insert( - tooling::Replacement(SourceMgr, LineStarts[i], -IndentDelta, "")); + for (size_t i = 0; i < Lines.size(); ++i) { + StringRef Line = (i == 0) ? Lines[i] : Lines[i].substr(PrefixSize); + size_t StartColumn = (i == 0) ? CurrentIndent : 0; + splitLineInComment(Tok.FormatTok, Line, StartColumn, NewPrefix, + InPPDirective, i != Lines.size() - 1); } } @@ -227,9 +317,8 @@ private: return std::string(NewLines, '\n') + std::string(Spaces, ' '); } - std::string - getNewLineText(unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn, const FormatStyle &Style) { + std::string getNewLineText(unsigned NewLines, unsigned Spaces, + unsigned WhitespaceStartColumn) { std::string NewLineText; if (NewLines > 0) { unsigned Offset = @@ -298,6 +387,7 @@ private: SourceManager &SourceMgr; tooling::Replacements Replaces; + const FormatStyle &Style; }; class UnwrappedLineFormatter { @@ -576,10 +666,10 @@ private: Style.MaxEmptyLinesToKeep + 1)); if (!Line.InPPDirective) Whitespaces.replaceWhitespace(Current, NewLines, State.Column, - WhitespaceStartColumn, Style); + WhitespaceStartColumn); else Whitespaces.replacePPWhitespace(Current, NewLines, State.Column, - WhitespaceStartColumn, Style); + WhitespaceStartColumn); } State.Stack.back().LastSpace = State.Column; @@ -614,7 +704,7 @@ private: unsigned Spaces = State.NextToken->SpacesRequiredBefore; if (!DryRun) - Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style); + Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column); if (Current.Type == TT_ObjCSelectorName && State.Stack.back().ColonPos == 0) { @@ -756,7 +846,8 @@ private: if (Current.isNot(tok::string_literal)) return 0; // Only break up default narrow strings. - if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0) + const char *LiteralData = Current.FormatTok.Tok.getLiteralData(); + if (!LiteralData || *LiteralData != '"') return 0; unsigned Penalty = 0; @@ -765,8 +856,7 @@ private: unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; unsigned OffsetFromStart = 0; while (StartColumn + TailLength > getColumnLimit()) { - StringRef Text = StringRef( - Current.FormatTok.Tok.getLiteralData() + TailOffset, TailLength); + StringRef Text = StringRef(LiteralData + TailOffset, TailLength); if (StartColumn + OffsetFromStart + 1 > getColumnLimit()) break; StringRef::size_type SplitPoint = getSplitPoint( @@ -780,9 +870,9 @@ private: StartColumn + OffsetFromStart + SplitPoint + 2; State.Stack.back().LastSpace = StartColumn; if (!DryRun) { - Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"", - Line.InPPDirective, StartColumn, - WhitespaceStartColumn, Style); + Whitespaces.breakToken(Current.FormatTok, TailOffset + SplitPoint + 1, + 0, "\"", "\"", Line.InPPDirective, StartColumn, + WhitespaceStartColumn); } TailOffset += SplitPoint + 1; TailLength -= SplitPoint + 1; @@ -1157,7 +1247,7 @@ public: SourceManager &SourceMgr, const std::vector &Ranges) : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), - Whitespaces(SourceMgr), Ranges(Ranges) {} + Whitespaces(SourceMgr, Style), Ranges(Ranges) {} virtual ~Formatter() {} @@ -1199,7 +1289,7 @@ public: if (PreviousLineWasTouched) { unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u); Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0, - /*WhitespaceStartColumn*/ 0, Style); + /*WhitespaceStartColumn*/ 0); } } else if (TheLine.Type != LT_Invalid && (WasMoved || touchesLine(TheLine))) { @@ -1503,10 +1593,10 @@ private: Newlines = 1; if (!InPPDirective || Tok.HasUnescapedNewline) { - Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style); + Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0); } else { Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent, - PreviousEndOfLineColumn, Style); + PreviousEndOfLineColumn); } } diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index e6ae81da5c..a3afbee916 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -651,6 +651,134 @@ TEST_F(FormatTest, AlignsMultiLineComments) { " */")); } +TEST_F(FormatTest, SplitsLongLinesInComments) { + EXPECT_EQ("/* This is a long\n" + "comment that doesn't\n" + "fit on one line. */", + format("/* " + "This is a long " + "comment that doesn't " + "fit on one line. */", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/*\n" + "This is a long\n" + "comment that doesn't\n" + "fit on one line.\n" + "*/", + format("/*\n" + "This is a long " + "comment that doesn't " + "fit on one line. \n" + "*/", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/*\n" + " * This is a long\n" + " * comment that\n" + " * doesn't fit on\n" + " * one line.\n" + " */", + format("/*\n" + " * This is a long " + " comment that " + " doesn't fit on " + " one line. \n" + " */", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/*\n" + " * This_is_a_comment_with_words_that_dont_fit_on_one_line\n" + " * so_it_should_be_broken\n" + " * wherever_a_space_occurs\n" + " */", + format("/*\n" + " * This_is_a_comment_with_words_that_dont_fit_on_one_line " + " so_it_should_be_broken " + " wherever_a_space_occurs \n" + " */", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/*\n" + " * This_comment_can_not_be_broken_into_lines\n" + " */", + format("/*\n" + " * This_comment_can_not_be_broken_into_lines\n" + " */", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("{\n" + " /*\n" + " This is another\n" + " long comment that\n" + " doesn't fit on one\n" + " line 1234567890\n" + " */\n" + "}", + format("{\n" + "/*\n" + "This is another " + " long comment that " + " doesn't fit on one" + " line 1234567890\n" + "*/\n" + "}", getLLVMStyleWithColumns(20))); + EXPECT_EQ("{\n" + " /*\n" + " * This i s\n" + " * another comment\n" + " * t hat doesn' t\n" + " * fit on one l i\n" + " * n e\n" + " */\n" + "}", + format("{\n" + "/*\n" + " * This i s" + " another comment" + " t hat doesn' t" + " fit on one l i" + " n e\n" + " */\n" + "}", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/*\n" + " * This is a long\n" + " * comment that\n" + " * doesn't fit on\n" + " * one line\n" + " */", + format(" /*\n" + " * This is a long comment that doesn't fit on one line\n" + " */", getLLVMStyleWithColumns(20))); +} + +TEST_F(FormatTest, SplitsLongLinesInCommentsInPreprocessor) { + EXPECT_EQ("#define X \\\n" + // FIXME: Backslash should be added here. + " /*\n" + " Macro comment \\\n" + " with a long \\\n" + " line \\\n" + // FIXME: We should look at the length of the last line of the token + // instead of the full token's length. + //" */ \\\n" + " */\\\n" + " A + B", + format("#define X \\\n" + " /*\n" + " Macro comment with a long line\n" + " */ \\\n" + " A + B", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("#define X \\\n" + " /* Macro comment \\\n" + // FIXME: Indent comment continuations when the comment is a first + // token in a line. + "with a long line \\\n" + // FIXME: We should look at the length of the last line of the token + // instead of the full token's length. + //"*/ \\\n" + "*/\\\n" + " A + B", + format("#define X \\\n" + " /* Macro comment with a long line */ \\\n" + " A + B", + getLLVMStyleWithColumns(20))); +} + TEST_F(FormatTest, CommentsInStaticInitializers) { EXPECT_EQ( "static SomeType type = { aaaaaaaaaaaaaaaaaaaa, /* comment */\n" -- 2.40.0