[clang-format] Implement comment reflowing.

author Krasimir Georgiev <krasimir@google.com>

Wed, 25 Jan 2017 13:58:58 +0000 (13:58 +0000)

committer Krasimir Georgiev <krasimir@google.com>

Wed, 25 Jan 2017 13:58:58 +0000 (13:58 +0000)
author Krasimir Georgiev <krasimir@google.com>
Wed, 25 Jan 2017 13:58:58 +0000 (13:58 +0000)
committer Krasimir Georgiev <krasimir@google.com>
Wed, 25 Jan 2017 13:58:58 +0000 (13:58 +0000)
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp

index 6363f895f95b94fa1c3f48632b68422f8aff9293..569ff1c648345796dd0d0ab1986bac22aed96a31 100644 (file)
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -14,7 +14,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "BreakableToken.h"
-#include "Comments.h"
+#include "ContinuationIndenter.h"
  #include "clang/Basic/CharInfo.h"
  #include "clang/Format/Format.h"
  #include "llvm/ADT/STLExtras.h"
@@ -40,6 +40,21 @@ static bool IsBlank(char C) {
    }
  }
  
+static StringRef getLineCommentIndentPrefix(StringRef Comment) {
+  static const char *const KnownPrefixes[] = {"///", "//", "//!"};
+  StringRef LongestPrefix;
+  for (StringRef KnownPrefix : KnownPrefixes) {
+    if (Comment.startswith(KnownPrefix)) {
+      size_t PrefixLength = KnownPrefix.size();
+      while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
+        ++PrefixLength;
+      if (PrefixLength > LongestPrefix.size())
+        LongestPrefix = Comment.substr(0, PrefixLength);
+    }
+  }
+  return LongestPrefix;
+}
+
  static BreakableToken::Split getCommentSplit(StringRef Text,
                                               unsigned ContentStartColumn,
                                               unsigned ColumnLimit,
@@ -132,12 +147,35 @@ getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
    return BreakableToken::Split(StringRef::npos, 0);
  }
  
+bool switchesFormatting(const FormatToken &Token) {
+  assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
+         "formatting regions are switched by comment tokens");
+  StringRef Content = Token.TokenText.substr(2).ltrim();
+  return Content.startswith("clang-format on") ||
+         Content.startswith("clang-format off");
+}
+
+unsigned
+BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+                                              Split Split) const {
+  // Example: consider the content
+  // lala  lala
+  // - RemainingTokenColumns is the original number of columns, 10;
+  // - Split is (4, 2), denoting the two spaces between the two words;
+  //
+  // We compute the number of columns when the split is compressed into a single
+  // space, like:
+  // lala lala
+  return RemainingTokenColumns + 1 - Split.second;
+}
+
  unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
  
  unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
+    unsigned LineIndex, unsigned TailOffset,
+    StringRef::size_type Length) const {
    return StartColumn + Prefix.size() + Postfix.size() +
-         encoding::columnWidthWithTabs(Line.substr(Offset, Length),
+         encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
                                         StartColumn + Prefix.size(),
                                         Style.TabWidth, Encoding);
  }
@@ -183,71 +221,123 @@ void BreakableStringLiteral::insertBreak(unsigned LineIndex,
        Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces);
  }
  
-BreakableLineComment::BreakableLineComment(
-    const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
-    bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
-    : BreakableSingleLineToken(Token, IndentLevel, StartColumn,
-                               getLineCommentIndentPrefix(Token.TokenText), "",
-                               InPPDirective, Encoding, Style) {
-  OriginalPrefix = Prefix;
-  if (Token.TokenText.size() > Prefix.size() &&
-      isAlphanumeric(Token.TokenText[Prefix.size()])) {
-    if (Prefix == "//")
-      Prefix = "// ";
-    else if (Prefix == "///")
-      Prefix = "/// ";
-    else if (Prefix == "//!")
-      Prefix = "//! ";
-  }
-}
-
-BreakableToken::Split
-BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
-                               unsigned ColumnLimit) const {
-  return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+BreakableComment::BreakableComment(const FormatToken &Token,
+                                   unsigned IndentLevel, unsigned StartColumn,
+                                   unsigned OriginalStartColumn,
+                                   bool FirstInLine, bool InPPDirective,
+                                   encoding::Encoding Encoding,
+                                   const FormatStyle &Style)
+    : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style),
+      StartColumn(StartColumn), OriginalStartColumn(OriginalStartColumn),
+      FirstInLine(FirstInLine) {}
+
+unsigned BreakableComment::getLineCount() const { return Lines.size(); }
+
+BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
+                                                 unsigned TailOffset,
+                                                 unsigned ColumnLimit) const {
+  return getCommentSplit(Content[LineIndex].substr(TailOffset),
+                         getContentStartColumn(LineIndex, TailOffset),
                           ColumnLimit, Style.TabWidth, Encoding);
  }
  
-void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                       Split Split,
-                                       WhitespaceManager &Whitespaces) {
+void BreakableComment::compressWhitespace(unsigned LineIndex,
+                                          unsigned TailOffset, Split Split,
+                                          WhitespaceManager &Whitespaces) {
+  StringRef Text = Content[LineIndex].substr(TailOffset);
+  // Text is relative to the content line, but Whitespaces operates relative to
+  // the start of the corresponding token, so compute the start of the Split
+  // that needs to be compressed into a single space relative to the start of
+  // its token.
+  unsigned BreakOffsetInToken =
+      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
    Whitespaces.replaceWhitespaceInToken(
-      Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
-      Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn);
+      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
+      /*InPPDirective=*/false,
+      /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
  }
  
-void BreakableLineComment::replaceWhitespace(unsigned LineIndex,
-                                             unsigned TailOffset, Split Split,
-                                             WhitespaceManager &Whitespaces) {
-  Whitespaces.replaceWhitespaceInToken(
-      Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "",
-      "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0,
-      /*Spaces=*/1);
-}
-
-void BreakableLineComment::replaceWhitespaceBefore(
-    unsigned LineIndex, WhitespaceManager &Whitespaces) {
-  if (OriginalPrefix != Prefix) {
-    Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
-                                         /*InPPDirective=*/false,
-                                         /*Newlines=*/0, /*IndentLevel=*/0,
-                                         /*Spaces=*/1);
+BreakableToken::Split
+BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+                                 unsigned PreviousEndColumn,
+                                 unsigned ColumnLimit) const {
+  unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
+  StringRef TrimmedText = Text.rtrim(Blanks);
+  // This is the width of the resulting line in case the full line of Text gets
+  // reflown up starting at ReflowStartColumn.
+  unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
+                                               TrimmedText, ReflowStartColumn,
+                                               Style.TabWidth, Encoding);
+  // If the full line fits up, we return a reflow split after it,
+  // otherwise we compute the largest piece of text that fits after
+  // ReflowStartColumn.
+  Split ReflowSplit =
+      FullWidth <= ColumnLimit
+          ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
+          : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
+                            Style.TabWidth, Encoding);
+
+  // We need to be extra careful here, because while it's OK to keep a long line
+  // if it can't be broken into smaller pieces (like when the first word of a
+  // long line is longer than the column limit), it's not OK to reflow that long
+  // word up. So we recompute the size of the previous line after reflowing and
+  // only return the reflow split if that's under the line limit.
+  if (ReflowSplit.first != StringRef::npos &&
+      // Check if the width of the newly reflown line is under the limit.
+      PreviousEndColumn + ReflowPrefix.size() +
+              encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
+                                            PreviousEndColumn +
+                                                ReflowPrefix.size(),
+                                            Style.TabWidth, Encoding) <=
+          ColumnLimit) {
+    return ReflowSplit;
    }
+  return Split(StringRef::npos, 0);
+}
+
+const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
+  return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
+}
+
+static bool mayReflowContent(StringRef Content) {
+  Content = Content.trim(Blanks);
+  // Simple heuristic for what to reflow: content should contain at least two
+  // characters and either the first or second character must be
+  // non-punctuation.
+  return Content.size() >= 2 && !Content.endswith("\\") &&
+         // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
+         // true, then the first code point must be 1 byte long.
+         (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
+}
+
+bool BreakableComment::mayReflow(unsigned LineIndex) const {
+  return LineIndex > 0 && mayReflowContent(Content[LineIndex]) &&
+         !Tok.Finalized && !switchesFormatting(tokenAt(LineIndex)) &&
+         (!Tok.is(TT_LineComment) ||
+          OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]);
  }
  
  BreakableBlockComment::BreakableBlockComment(
      const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
      unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
      encoding::Encoding Encoding, const FormatStyle &Style)
-    : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) {
-  StringRef TokenText(Token.TokenText);
+    : BreakableComment(Token, IndentLevel, StartColumn, OriginalStartColumn,
+                       FirstInLine, InPPDirective, Encoding, Style) {
+  assert(Tok.is(TT_BlockComment) &&
+         "block comment section must start with a block comment");
+
+  StringRef TokenText(Tok.TokenText);
    assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
    TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
  
    int IndentDelta = StartColumn - OriginalStartColumn;
-  LeadingWhitespace.resize(Lines.size());
-  StartOfLineColumn.resize(Lines.size());
-  StartOfLineColumn[0] = StartColumn + 2;
+  Content.resize(Lines.size());
+  Content[0] = Lines[0];
+  ContentColumn.resize(Lines.size());
+  // Account for the initial '/*'.
+  ContentColumn[0] = StartColumn + 2;
+  Tokens.resize(Lines.size());
    for (size_t i = 1; i < Lines.size(); ++i)
      adjustWhitespace(i, IndentDelta);
  
@@ -262,19 +352,20 @@ BreakableBlockComment::BreakableBlockComment(
    }
    for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
      // If the last line is empty, the closing "*/" will have a star.
-    if (i + 1 == e && Lines[i].empty())
+    if (i + 1 == e && Content[i].empty())
        break;
-    if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i]))
+    if (!Content[i].empty() && i + 1 != e &&
+        Decoration.startswith(Content[i]))
        continue;
-    while (!Lines[i].startswith(Decoration))
+    while (!Content[i].startswith(Decoration))
        Decoration = Decoration.substr(0, Decoration.size() - 1);
    }
  
    LastLineNeedsDecoration = true;
-  IndentAtLineBreak = StartOfLineColumn[0] + 1;
-  for (size_t i = 1; i < Lines.size(); ++i) {
-    if (Lines[i].empty()) {
-      if (i + 1 == Lines.size()) {
+  IndentAtLineBreak = ContentColumn[0] + 1;
+  for (size_t i = 1, e = Lines.size(); i < e; ++i) {
+    if (Content[i].empty()) {
+      if (i + 1 == e) {
          // Empty last line means that we already have a star as a part of the
          // trailing */. We also need to preserve whitespace, so that */ is
          // correctly indented.
@@ -282,7 +373,7 @@ BreakableBlockComment::BreakableBlockComment(
        } else if (Decoration.empty()) {
          // For all other lines, set the start column to 0 if they're empty, so
          // we do not insert trailing whitespace anywhere.
-        StartOfLineColumn[i] = 0;
+        ContentColumn[i] = 0;
        }
        continue;
      }
@@ -290,21 +381,23 @@ BreakableBlockComment::BreakableBlockComment(
      // The first line already excludes the star.
      // For all other lines, adjust the line to exclude the star and
      // (optionally) the first whitespace.
-    unsigned DecorationSize =
-        Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size();
-    StartOfLineColumn[i] += DecorationSize;
-    Lines[i] = Lines[i].substr(DecorationSize);
-    LeadingWhitespace[i] += DecorationSize;
-    if (!Decoration.startswith(Lines[i]))
+    unsigned DecorationSize = Decoration.startswith(Content[i])
+                                  ? Content[i].size()
+                                  : Decoration.size();
+    ContentColumn[i] += DecorationSize;
+    Content[i] = Content[i].substr(DecorationSize);
+    if (!Decoration.startswith(Content[i]))
        IndentAtLineBreak =
-          std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
+          std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
    }
-  IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+  IndentAtLineBreak =
+      std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+
    DEBUG({
      llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
      for (size_t i = 0; i < Lines.size(); ++i) {
-      llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
-                   << "\n";
+      llvm::dbgs() << i << " |" << Content[i] << "| "
+                   << (Content[i].data() - Lines[i].data()) << "\n";
      }
    });
  }
@@ -334,78 +427,157 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
  
    StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
    // Adjust Lines to only contain relevant text.
-  Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
-  Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
-  // Adjust LeadingWhitespace to account all whitespace between the lines
-  // to the current line.
-  LeadingWhitespace[LineIndex] =
-      Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+  size_t PreviousContentOffset =
+      Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
+  Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
+      PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
+  Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
  
    // Adjust the start column uniformly across all lines.
-  StartOfLineColumn[LineIndex] =
+  ContentColumn[LineIndex] =
        encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
        IndentDelta;
  }
  
-unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
-
  unsigned BreakableBlockComment::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
-  unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
-  return ContentStartColumn +
-         encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
-                                       ContentStartColumn, Style.TabWidth,
-                                       Encoding) +
-         // The last line gets a "*/" postfix.
-         (LineIndex + 1 == Lines.size() ? 2 : 0);
-}
-
-BreakableToken::Split
-BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
-                                unsigned ColumnLimit) const {
-  return getCommentSplit(Lines[LineIndex].substr(TailOffset),
-                         getContentStartColumn(LineIndex, TailOffset),
-                         ColumnLimit, Style.TabWidth, Encoding);
+    unsigned LineIndex, unsigned TailOffset,
+    StringRef::size_type Length) const {
+  unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+  unsigned LineLength =
+      ContentStartColumn + encoding::columnWidthWithTabs(
+                               Content[LineIndex].substr(TailOffset, Length),
+                               ContentStartColumn, Style.TabWidth, Encoding);
+  // The last line gets a "*/" postfix.
+  if (LineIndex + 1 == Lines.size()) {
+    LineLength += 2;
+    // We never need a decoration when breaking just the trailing "*/" postfix.
+    // Note that checking that Length == 0 is not enough, since Length could
+    // also be StringRef::npos.
+    if (Content[LineIndex].substr(TailOffset, Length).empty()) {
+      LineLength -= Decoration.size();
+    }
+  }
+  return LineLength;
  }
  
  void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
                                          Split Split,
                                          WhitespaceManager &Whitespaces) {
-  StringRef Text = Lines[LineIndex].substr(TailOffset);
+  StringRef Text = Content[LineIndex].substr(TailOffset);
    StringRef Prefix = Decoration;
+  // We need this to account for the case when we have a decoration "* " for all
+  // the lines except for the last one, where the star in "*/" acts as a
+  // decoration.
+  unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
    if (LineIndex + 1 == Lines.size() &&
        Text.size() == Split.first + Split.second) {
      // For the last line we need to break before "*/", but not to add "* ".
      Prefix = "";
+    if (LocalIndentAtLineBreak >= 2)
+      LocalIndentAtLineBreak -= 2;
    }
-
+  // The split offset is from the beginning of the line. Convert it to an offset
+  // from the beginning of the token text.
    unsigned BreakOffsetInToken =
-      Text.data() - Tok.TokenText.data() + Split.first;
+      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
    unsigned CharsToRemove = Split.second;
-  assert(IndentAtLineBreak >= Decoration.size());
+  assert(LocalIndentAtLineBreak >= Prefix.size());
    Whitespaces.replaceWhitespaceInToken(
-      Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1,
-      IndentLevel, IndentAtLineBreak - Decoration.size());
+      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix,
+      InPPDirective, /*Newlines=*/1, IndentLevel,
+      /*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
  }
  
-void BreakableBlockComment::replaceWhitespace(unsigned LineIndex,
-                                              unsigned TailOffset, Split Split,
-                                              WhitespaceManager &Whitespaces) {
-  StringRef Text = Lines[LineIndex].substr(TailOffset);
-  unsigned BreakOffsetInToken =
-      Text.data() - Tok.TokenText.data() + Split.first;
-  unsigned CharsToRemove = Split.second;
-  Whitespaces.replaceWhitespaceInToken(
-      Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false,
-      /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
+BreakableToken::Split BreakableBlockComment::getSplitBefore(
+    unsigned LineIndex,
+    unsigned PreviousEndColumn,
+    unsigned ColumnLimit) const {
+  if (!mayReflow(LineIndex))
+    return Split(StringRef::npos, 0);
+  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+  return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
+                        ColumnLimit);
+}
+
+unsigned BreakableBlockComment::getReflownColumn(
+    StringRef Content,
+    unsigned LineIndex,
+    unsigned PreviousEndColumn) const {
+    unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+    // If this is the last line, it will carry around its '*/' postfix.
+    unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
+    // The line is composed of previous text, reflow prefix, reflown text and
+    // postfix.
+    unsigned ReflownColumn =
+        StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,
+                                                    Style.TabWidth, Encoding) +
+        PostfixLength;
+    return ReflownColumn;
  }
  
+unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
+    unsigned LineIndex, unsigned TailOffset,
+    unsigned PreviousEndColumn,
+    unsigned ColumnLimit,
+    Split SplitBefore) const {
+    if (SplitBefore.first == StringRef::npos ||
+        SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+      // A piece of line, not the whole, gets reflown.
+      return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+  } else {
+    // The whole line gets reflown, need to check if we need to insert a break
+    // for the postfix or not.
+    StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+    unsigned ReflownColumn =
+        getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+    if (ReflownColumn <= ColumnLimit) {
+      return ReflownColumn;
+    }
+    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+  }
+}
  void BreakableBlockComment::replaceWhitespaceBefore(
-    unsigned LineIndex, WhitespaceManager &Whitespaces) {
-  if (LineIndex == 0)
+    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+    Split SplitBefore, WhitespaceManager &Whitespaces) {
+  if (LineIndex == 0) return;
+  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+  if (SplitBefore.first != StringRef::npos) {
+    // Here we need to reflow.
+    assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
+           "Reflowing whitespace within a token");
+    // This is the offset of the end of the last line relative to the start of
+    // the token text in the token.
+    unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+        Content[LineIndex - 1].size() -
+        tokenAt(LineIndex).TokenText.data();
+    unsigned WhitespaceLength = TrimmedContent.data() -
+        tokenAt(LineIndex).TokenText.data() -
+        WhitespaceOffsetInToken;
+    Whitespaces.replaceWhitespaceInToken(
+        tokenAt(LineIndex), WhitespaceOffsetInToken,
+        /*ReplaceChars=*/WhitespaceLength,
+        /*PreviousPostfix=*/"",
+        /*CurrentPrefix=*/ReflowPrefix, InPPDirective,
+        /*Newlines=*/0, IndentLevel, /*Spaces=*/0);
+    // Check if we need to also insert a break at the whitespace range.
+    // For this we first adapt the reflow split relative to the beginning of the
+    // content.
+    // Note that we don't need a penalty for this break, since it doesn't change
+    // the total number of lines.
+    Split BreakSplit = SplitBefore;
+    BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();
+    unsigned ReflownColumn =
+        getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+    if (ReflownColumn > ColumnLimit) {
+      insertBreak(LineIndex, 0, BreakSplit, Whitespaces);
+    }
      return;
+  }
+
+  // Here no reflow with the previous line will happen.
+  // Fix the decoration of the line at LineIndex.
    StringRef Prefix = Decoration;
-  if (Lines[LineIndex].empty()) {
+  if (Content[LineIndex].empty()) {
      if (LineIndex + 1 == Lines.size()) {
        if (!LastLineNeedsDecoration) {
          // If the last line was empty, we don't need a prefix, as the */ will
@@ -418,19 +590,23 @@ void BreakableBlockComment::replaceWhitespaceBefore(
        Prefix = Prefix.substr(0, 1);
      }
    } else {
-    if (StartOfLineColumn[LineIndex] == 1) {
+    if (ContentColumn[LineIndex] == 1) {
        // This line starts immediately after the decorating *.
        Prefix = Prefix.substr(0, 1);
      }
    }
-
-  unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() -
-                                     Tok.TokenText.data() -
-                                     LeadingWhitespace[LineIndex];
+  // This is the offset of the end of the last line relative to the start of the
+  // token text in the token.
+  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+                                     Content[LineIndex - 1].size() -
+                                     tokenAt(LineIndex).TokenText.data();
+  unsigned WhitespaceLength = Content[LineIndex].data() -
+                              tokenAt(LineIndex).TokenText.data() -
+                              WhitespaceOffsetInToken;
    Whitespaces.replaceWhitespaceInToken(
-      Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
-      InPPDirective, 1, IndentLevel,
-      StartOfLineColumn[LineIndex] - Prefix.size());
+      tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "",
+      Prefix, InPPDirective, /*Newlines=*/1, IndentLevel,
+      ContentColumn[LineIndex] - Prefix.size());
  }
  
  unsigned
@@ -439,7 +615,219 @@ BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
    // If we break, we always break at the predefined indent.
    if (TailOffset != 0)
      return IndentAtLineBreak;
-  return std::max(0, StartOfLineColumn[LineIndex]);
+  return std::max(0, ContentColumn[LineIndex]);
+}
+
+BreakableLineCommentSection::BreakableLineCommentSection(
+    const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
+    unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+    encoding::Encoding Encoding, const FormatStyle &Style)
+    : BreakableComment(Token, IndentLevel, StartColumn, OriginalStartColumn,
+                       FirstInLine, InPPDirective, Encoding, Style) {
+  assert(Tok.is(TT_LineComment) &&
+         "line comment section must start with a line comment");
+  FormatToken *LineTok = nullptr;
+  for (const FormatToken *CurrentTok = &Tok;
+       CurrentTok && CurrentTok->is(TT_LineComment);
+       CurrentTok = CurrentTok->Next) {
+    LastLineTok = LineTok;
+    StringRef TokenText(CurrentTok->TokenText);
+    assert(TokenText.startswith("//"));
+    size_t FirstLineIndex = Lines.size();
+    TokenText.split(Lines, "\n");
+    Content.resize(Lines.size());
+    ContentColumn.resize(Lines.size());
+    OriginalContentColumn.resize(Lines.size());
+    Tokens.resize(Lines.size());
+    Prefix.resize(Lines.size());
+    OriginalPrefix.resize(Lines.size());
+    for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+      StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i]);
+      OriginalPrefix[i] = Prefix[i] = IndentPrefix;
+      if (Lines[i].size() > Prefix[i].size() &&
+          isAlphanumeric(Lines[i][Prefix[i].size()])) {
+        if (Prefix[i] == "//")
+          Prefix[i] = "// ";
+        else if (Prefix[i] == "///")
+          Prefix[i] = "/// ";
+        else if (Prefix[i] == "//!")
+          Prefix[i] = "//! ";
+      }
+
+      Tokens[i] = LineTok;
+      Content[i] = Lines[i].substr(IndentPrefix.size());
+      OriginalContentColumn[i] =
+          StartColumn +
+          encoding::columnWidthWithTabs(OriginalPrefix[i],
+                                        StartColumn,
+                                        Style.TabWidth,
+                                        Encoding);
+      ContentColumn[i] =
+          StartColumn +
+          encoding::columnWidthWithTabs(Prefix[i],
+                                        StartColumn,
+                                        Style.TabWidth,
+                                        Encoding);
+
+      // Calculate the end of the non-whitespace text in this line.
+      size_t EndOfLine = Content[i].find_last_not_of(Blanks);
+      if (EndOfLine == StringRef::npos)
+        EndOfLine = Content[i].size();
+      else
+        ++EndOfLine;
+      Content[i] = Content[i].substr(0, EndOfLine);
+    }
+    LineTok = CurrentTok->Next;
+  }
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
+    unsigned LineIndex, unsigned TailOffset,
+    StringRef::size_type Length) const {
+  unsigned ContentStartColumn =
+      (TailOffset == 0 ? ContentColumn[LineIndex]
+                       : OriginalContentColumn[LineIndex]);
+  return ContentStartColumn + encoding::columnWidthWithTabs(
+                                  Content[LineIndex].substr(TailOffset, Length),
+                                  ContentStartColumn, Style.TabWidth, Encoding);
+}
+
+void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
+                                              unsigned TailOffset, Split Split,
+                                              WhitespaceManager &Whitespaces) {
+  StringRef Text = Content[LineIndex].substr(TailOffset);
+  // Compute the offset of the split relative to the beginning of the token
+  // text.
+  unsigned BreakOffsetInToken =
+      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
+  // Compute the size of the new indent, including the size of the new prefix of
+  // the newly broken line.
+  unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] +
+                               Prefix[LineIndex].size() -
+                               OriginalPrefix[LineIndex].size();
+  assert(IndentAtLineBreak >= Prefix[LineIndex].size());
+  Whitespaces.replaceWhitespaceInToken(
+      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
+      Prefix[LineIndex], InPPDirective, /*Newlines=*/1, IndentLevel,
+      /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
+}
+
+BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
+    unsigned LineIndex,
+    unsigned PreviousEndColumn,
+    unsigned ColumnLimit) const {
+  if (!mayReflow(LineIndex)) return Split(StringRef::npos, 0);
+  return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
+                        ColumnLimit);
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
+    unsigned LineIndex, unsigned TailOffset,
+    unsigned PreviousEndColumn,
+    unsigned ColumnLimit,
+    Split SplitBefore) const {
+  if (SplitBefore.first == StringRef::npos ||
+      SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+    // A piece of line, not the whole line, gets reflown.
+    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+  } else {
+    // The whole line gets reflown.
+    unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+    return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],
+                                                       StartColumn,
+                                                       Style.TabWidth,
+                                                       Encoding);
+  }
+}
+
+void BreakableLineCommentSection::replaceWhitespaceBefore(
+    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+    Split SplitBefore, WhitespaceManager &Whitespaces) {
+  // If this is the first line of a token, we need to inform Whitespace Manager
+  // about it: either adapt the whitespace range preceding it, or mark it as an
+  // untouchable token.
+  // This happens for instance here:
+  // // line 1 \
+  // // line 2
+  if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
+    if (SplitBefore.first != StringRef::npos) {
+      // Reflow happens between tokens. Replace the whitespace between the
+      // tokens by the empty string.
+      Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+                                    /*Newlines=*/0,
+                                    /*IndentLevel=*/IndentLevel,
+                                    /*Spaces=*/0,
+                                    /*StartOfTokenColumn=*/StartColumn,
+                                    /*InPPDirective=*/false);
+      // Replace the indent and prefix of the token with the reflow prefix.
+      unsigned WhitespaceLength =
+          Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
+      Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
+                                           /*Offset=*/0,
+                                           /*ReplaceChars=*/WhitespaceLength,
+                                           /*PreviousPostfix=*/"",
+                                           /*CurrentPrefix=*/ReflowPrefix,
+                                           /*InPPDirective=*/false,
+                                           /*Newlines=*/0,
+                                           /*IndentLevel=*/IndentLevel,
+                                           /*Spaces=*/0);
+    } else {
+      // This is the first line for the current token, but no reflow with the
+      // previous token is necessary. However, we still may need to adjust the
+      // start column.
+      unsigned LineColumn =
+          ContentColumn[LineIndex] -
+          (Content[LineIndex].data() - Lines[LineIndex].data());
+      if (tokenAt(LineIndex).OriginalColumn != LineColumn) {
+        Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+                                      /*Newlines=*/1,
+                                      /*IndentLevel=*/IndentLevel,
+                                      /*Spaces=*/LineColumn,
+                                      /*StartOfTokenColumn=*/LineColumn,
+                                      /*InPPDirective=*/false);
+      } else {
+        // The whitespace preceding the first line of this token does not need
+        // to be touched.
+        Whitespaces.addUntouchableToken(tokenAt(LineIndex),
+                                        /*InPPDirective=*/false);
+      }
+    }
+  } else if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
+    // This is not the first line of the token. Adjust the prefix if necessary.
+
+    // Take care of the space possibly introduced after a decoration.
+    assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() &&
+           "Expecting a block comment decoration to differ from original by "
+           "at most a space");
+    Whitespaces.replaceWhitespaceInToken(
+        tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
+        /*InPPDirective=*/false,
+        /*Newlines=*/0, /*IndentLevel=*/0,
+        /*Spaces=*/1);
+  }
+  // Add a break after a reflow split has been introduced, if necessary.
+  // Note that this break doesn't need to be penalized, since it doesn't change
+  // the number of lines.
+  if (SplitBefore.first != StringRef::npos &&
+      SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+    insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
+  }
+}
+
+void BreakableLineCommentSection::updateNextToken(LineState& State) const {
+  if (LastLineTok) {
+    State.NextToken = LastLineTok->Next;
+  }
+}
+
+unsigned
+BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
+                                                   unsigned TailOffset) const {
+  if (TailOffset != 0) {
+    return OriginalContentColumn[LineIndex];
+  }
+  return ContentColumn[LineIndex];
  }
  
  } // namespace format
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h

index eb1f9fda307114ff112a4f4d996df75ed42f1e86..848a76ddca4cfe2de4769dec0050822dbd6e467d 100644 (file)
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -8,9 +8,10 @@
  //===----------------------------------------------------------------------===//
  ///
  /// \file
-/// \brief Declares BreakableToken, BreakableStringLiteral, and
-/// BreakableBlockComment classes, that contain token type-specific logic to
-/// break long lines in tokens.
+/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment,
+/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
+/// token type-specific logic to break long lines in tokens and reflow content
+/// between tokens.
  ///
  //===----------------------------------------------------------------------===//
  
@@ -25,10 +26,43 @@
  namespace clang {
  namespace format {
  
+/// \brief Checks if \p Token switches formatting, like /* clang-format off */.
+/// \p Token must be a comment.
+bool switchesFormatting(const FormatToken &Token);
+
  struct FormatStyle;
  
  /// \brief Base class for strategies on how to break tokens.
  ///
+/// This is organised around the concept of a \c Split, which is a whitespace
+/// range that signifies a position of the content of a token where a
+/// reformatting might be done. Operating with splits is divided into 3
+/// operations:
+/// - getSplit, for finding a split starting at a position,
+/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
+///   of the content after a split has been used for breaking, and
+/// - insertBreak, for executing the split using a whitespace manager.
+///
+/// There is a pair of operations that are used to compress a long whitespace
+/// range with a single space if that will bring the line lenght under the
+/// column limit:
+/// - getLineLengthAfterCompression, for calculating the size in columns of the
+///   line after a whitespace range has been compressed, and
+/// - compressWhitespace, for executing the whitespace compression using a
+///   whitespace manager; note that the compressed whitespace may be in the
+///   middle of the original line and of the reformatted line.
+///
+/// For tokens where the whitespace before each line needs to be also
+/// reformatted, for example for tokens supporting reflow, there are analogous
+/// operations that might be executed before the main line breaking occurs:
+/// - getSplitBefore, for finding a split such that the content preceding it
+///   needs to be specially reflown,
+/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
+///   of the remainder of the content after the beginning of the content has
+///   been reformatted, and
+/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+///   manager.
+///
  /// FIXME: The interface seems set in stone, so we might want to just pull the
  /// strategy into the class, instead of controlling it from the outside.
  class BreakableToken {
@@ -42,13 +76,13 @@ public:
    virtual unsigned getLineCount() const = 0;
  
    /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
+  /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
    ///
-  /// Note that previous breaks are not taken into account. \p Offset is always
-  /// specified from the start of the (original) line.
+  /// Note that previous breaks are not taken into account. \p TailOffset is
+  /// always specified from the start of the (original) line.
    /// \p Length can be set to StringRef::npos, which means "to the end of line".
    virtual unsigned
-  getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
+  getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
                            StringRef::size_type Length) const = 0;
  
    /// \brief Returns a range (offset, length) at which to break the line at
@@ -61,16 +95,59 @@ public:
    virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                             WhitespaceManager &Whitespaces) = 0;
  
+  /// \brief Returns the number of columns required to format the piece of line
+  /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range
+  /// \p Split has been compressed into a single space.
+  unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+                                         Split Split) const;
+
    /// \brief Replaces the whitespace range described by \p Split with a single
    /// space.
-  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
-                                 Split Split,
-                                 WhitespaceManager &Whitespaces) = 0;
+  virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
+                                  Split Split,
+                                  WhitespaceManager &Whitespaces) = 0;
+
+  /// \brief Returns a whitespace range (offset, length) of the content at
+  /// \p LineIndex such that the content preceding this range needs to be
+  /// reformatted before any breaks are made to this line.
+  ///
+  /// \p PreviousEndColumn is the end column of the previous line after
+  /// formatting.
+  ///
+  /// A result having offset == StringRef::npos means that no piece of the line
+  /// needs to be reformatted before any breaks are made.
+  virtual Split getSplitBefore(unsigned LineIndex,
+                               unsigned PreviousEndColumn,
+                               unsigned ColumnLimit) const {
+    return Split(StringRef::npos, 0);
+  }
+
+  /// \brief Returns the number of columns required to format the piece of line
+  /// at \p LineIndex after the content preceding the whitespace range specified
+  /// \p SplitBefore has been reformatted, but before any breaks are made to
+  /// this line.
+  virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+                                         unsigned TailOffset,
+                                         unsigned PreviousEndColumn,
+                                         unsigned ColumnLimit,
+                                         Split SplitBefore) const {
+    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+  }
  
    /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+  /// Performs a reformatting of the content at \p LineIndex preceding the
+  /// whitespace range \p SplitBefore.
    virtual void replaceWhitespaceBefore(unsigned LineIndex,
+                                       unsigned PreviousEndColumn,
+                                       unsigned ColumnLimit,
+                                       Split SplitBefore,
                                         WhitespaceManager &Whitespaces) {}
  
+  /// \brief Updates the next token of \p State to the next token after this
+  /// one. This can be used when this token manages a set of underlying tokens
+  /// as a unit and is responsible for the formatting of the them.
+  virtual void updateNextToken(LineState &State) const {}
+
  protected:
    BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
                   bool InPPDirective, encoding::Encoding Encoding,
@@ -126,98 +203,143 @@ public:
                   unsigned ColumnLimit) const override;
    void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                     WhitespaceManager &Whitespaces) override;
-  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                         WhitespaceManager &Whitespaces) override {}
+  void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+                          WhitespaceManager &Whitespaces) override {}
  };
  
-class BreakableLineComment : public BreakableSingleLineToken {
-public:
-  /// \brief Creates a breakable token for a line comment.
+class BreakableComment : public BreakableToken {
+protected:
+  /// \brief Creates a breakable token for a comment.
    ///
    /// \p StartColumn specifies the column in which the comment will start
-  /// after formatting.
-  BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
-                       unsigned StartColumn, bool InPPDirective,
-                       encoding::Encoding Encoding, const FormatStyle &Style);
+  /// after formatting, while \p OriginalStartColumn specifies in which
+  /// column the comment started before formatting.
+  /// If the comment starts a line after formatting, set \p FirstInLine to true.
+  BreakableComment(const FormatToken &Token, unsigned IndentLevel,
+                   unsigned StartColumn, unsigned OriginalStartColumn,
+                   bool FirstInLine, bool InPPDirective,
+                   encoding::Encoding Encoding, const FormatStyle &Style);
  
+public:
+  unsigned getLineCount() const override;
    Split getSplit(unsigned LineIndex, unsigned TailOffset,
                   unsigned ColumnLimit) const override;
-  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                   WhitespaceManager &Whitespaces) override;
-  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                         WhitespaceManager &Whitespaces) override;
-  void replaceWhitespaceBefore(unsigned LineIndex,
-                               WhitespaceManager &Whitespaces) override;
+  void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+                          WhitespaceManager &Whitespaces) override;
  
-private:
-  // The prefix without an additional space if one was added.
-  StringRef OriginalPrefix;
+protected:
+  virtual unsigned getContentStartColumn(unsigned LineIndex,
+                                         unsigned TailOffset) const = 0;
+
+  // Returns a split that divides Text into a left and right parts, such that
+  // the left part is suitable for reflowing after PreviousEndColumn.
+  Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+                       unsigned PreviousEndColumn, unsigned ColumnLimit) const;
+
+  // Returns the token containing the line at LineIndex.
+  const FormatToken &tokenAt(unsigned LineIndex) const;
+
+  // Checks if the content of line LineIndex may be reflown with the previous
+  // line.
+  bool mayReflow(unsigned LineIndex) const;
+
+  // Contains the original text of the lines of the block comment.
+  //
+  // In case of a block comments, excludes the leading /* in the first line and
+  // trailing */ in the last line. In case of line comments, excludes the
+  // leading // and spaces.
+  SmallVector<StringRef, 16> Lines;
+
+  // Contains the text of the lines excluding all leading and trailing
+  // whitespace between the lines. Note that the decoration (if present) is also
+  // not considered part of the text.
+  SmallVector<StringRef, 16> Content;
+
+  // Tokens[i] contains a reference to the token containing Lines[i] if the
+  // whitespace range before that token is managed by this block.
+  // Otherwise, Tokens[i] is a null pointer.
+  SmallVector<FormatToken *, 16> Tokens;
+
+  // ContentColumn[i] is the target column at which Content[i] should be.
+  // Note that this excludes a leading "* " or "*" in case of block comments
+  // where all lines have a "*" prefix, or the leading "// " or "//" in case of
+  // line comments.
+  //
+  // In block comments, the first line's target column is always positive. The
+  // remaining lines' target columns are relative to the first line to allow
+  // correct indentation of comments in \c WhitespaceManager. Thus they can be
+  // negative as well (in case the first line needs to be unindented more than
+  // there's actual whitespace in another line).
+  SmallVector<int, 16> ContentColumn;
+
+  // The intended start column of the first line of text from this section.
+  unsigned StartColumn;
+
+  // The original start column of the first line of text from this section.
+  unsigned OriginalStartColumn;
+
+  // Whether the first token of this section is the first token in its unwrapped
+  // line.
+  bool FirstInLine;
+
+  // In case of line comments, holds the original prefix, including trailing
+  // whitespace.
+  SmallVector<StringRef, 16> OriginalPrefix;
+
+  // The prefix to use in front a line that has been reflown up.
+  // For example, when reflowing the second line after the first here:
+  // // comment 1
+  // // comment 2
+  // we expect:
+  // // comment 1 comment 2
+  // and not:
+  // // comment 1comment 2
+  StringRef ReflowPrefix = " ";
  };
  
-class BreakableBlockComment : public BreakableToken {
+class BreakableBlockComment : public BreakableComment {
  public:
-  /// \brief Creates a breakable token for a block comment.
-  ///
-  /// \p StartColumn specifies the column in which the comment will start
-  /// after formatting, while \p OriginalStartColumn specifies in which
-  /// column the comment started before formatting.
-  /// If the comment starts a line after formatting, set \p FirstInLine to true.
    BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
-                        unsigned StartColumn, unsigned OriginaStartColumn,
+                        unsigned StartColumn, unsigned OriginalStartColumn,
                          bool FirstInLine, bool InPPDirective,
                          encoding::Encoding Encoding, const FormatStyle &Style);
  
-  unsigned getLineCount() const override;
-  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
+  unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                   unsigned TailOffset,
                                     StringRef::size_type Length) const override;
-  Split getSplit(unsigned LineIndex, unsigned TailOffset,
-                 unsigned ColumnLimit) const override;
    void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                     WhitespaceManager &Whitespaces) override;
-  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                         WhitespaceManager &Whitespaces) override;
-  void replaceWhitespaceBefore(unsigned LineIndex,
+  Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+                       unsigned ColumnLimit) const override;
+  unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+                                         unsigned TailOffset,
+                                         unsigned PreviousEndColumn,
+                                         unsigned ColumnLimit,
+                                         Split SplitBefore) const override;
+  void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+                               unsigned ColumnLimit,
+                               Split SplitBefore,
                                 WhitespaceManager &Whitespaces) override;
  
  private:
-  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
-  // so that all whitespace between the lines is accounted to Lines[LineIndex]
-  // as leading whitespace:
-  // - Lines[LineIndex] points to the text after that whitespace
-  // - Lines[LineIndex-1] shrinks by its trailing whitespace
-  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
-  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
+  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
    //
-  // Sets StartOfLineColumn to the intended column in which the text at
+  // Updates Content[LineIndex-1] and Content[LineIndex] by stripping off
+  // leading and trailing whitespace.
+  //
+  // Sets ContentColumn to the intended column in which the text at
    // Lines[LineIndex] starts (note that the decoration, if present, is not
    // considered part of the text).
    void adjustWhitespace(unsigned LineIndex, int IndentDelta);
  
-  // Returns the column at which the text in line LineIndex starts, when broken
-  // at TailOffset. Note that the decoration (if present) is not considered part
-  // of the text.
-  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
-
-  // Contains the text of the lines of the block comment, excluding the leading
-  // /* in the first line and trailing */ in the last line, and excluding all
-  // trailing whitespace between the lines. Note that the decoration (if
-  // present) is also not considered part of the text.
-  SmallVector<StringRef, 16> Lines;
+  // Computes the end column if the full Content from LineIndex gets reflown
+  // after PreviousEndColumn.
+  unsigned getReflownColumn(StringRef Content,
+                            unsigned LineIndex,
+                            unsigned PreviousEndColumn) const;
  
-  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
-  // front of Lines[i]. Note that this can include "* " sequences, which we
-  // regard as whitespace when all lines have a "*" prefix.
-  SmallVector<unsigned, 16> LeadingWhitespace;
-
-  // StartOfLineColumn[i] is the target column at which Line[i] should be.
-  // Note that this excludes a leading "* " or "*" in case all lines have
-  // a "*" prefix.
-  // The first line's target column is always positive. The remaining lines'
-  // target columns are relative to the first line to allow correct indentation
-  // of comments in \c WhitespaceManager. Thus they can be negative as well (in
-  // case the first line needs to be unindented more than there's actual
-  // whitespace in another line).
-  SmallVector<int, 16> StartOfLineColumn;
+  unsigned getContentStartColumn(unsigned LineIndex,
+                                 unsigned TailOffset) const override;
  
    // The column at which the text of a broken line should start.
    // Note that an optional decoration would go before that column.
@@ -239,6 +361,53 @@ private:
    StringRef Decoration;
  };
  
+class BreakableLineCommentSection : public BreakableComment {
+public:
+  BreakableLineCommentSection(const FormatToken &Token, unsigned IndentLevel,
+                              unsigned StartColumn,
+                              unsigned OriginalStartColumn, bool FirstInLine,
+                              bool InPPDirective, encoding::Encoding Encoding,
+                              const FormatStyle &Style);
+
+  unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                   unsigned TailOffset,
+                                   StringRef::size_type Length) const override;
+  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                   WhitespaceManager &Whitespaces) override;
+  Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+                       unsigned ColumnLimit) const override;
+  unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset,
+                                         unsigned PreviousEndColumn,
+                                         unsigned ColumnLimit,
+                                         Split SplitBefore) const override;
+  void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+                               unsigned ColumnLimit, Split SplitBefore,
+                               WhitespaceManager &Whitespaces) override;
+  void updateNextToken(LineState& State) const override;
+
+private:
+  unsigned getContentStartColumn(unsigned LineIndex,
+                                 unsigned TailOffset) const override;
+
+  // Prefix[i] contains the intended leading "//" with trailing spaces to
+  // account for the indentation of content within the comment at line i after
+  // formatting. It can be different than the original prefix when the original
+  // line starts like this:
+  // //content
+  // Then the original prefix is "//", but the prefix is "// ".
+  SmallVector<StringRef, 16> Prefix;
+
+  SmallVector<unsigned, 16> OriginalContentColumn;
+
+  /// \brief The token to which the last line of this breakable token belongs
+  /// to; nullptr if that token is the initial token.
+  ///
+  /// The distinction is because if the token of the last line of this breakable
+  /// token is distinct from the initial token, this breakable token owns the
+  /// whitespace before the token of the last line, and the whitespace manager
+  /// must be able to modify it.
+  FormatToken *LastLineTok = nullptr;
+};
  } // namespace format
  } // namespace clang
  
diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt

index c977c2d3c5fa168c3d6cce3b73a2665ce80c14a5..cb46b9f255d2bb1278cec703c95f838c0d86499a 100644 (file)
--- a/lib/Format/CMakeLists.txt
+++ b/lib/Format/CMakeLists.txt
@@ -3,7 +3,6 @@ set(LLVM_LINK_COMPONENTS support)
  add_clang_library(clangFormat
    AffectedRangeManager.cpp
    BreakableToken.cpp
-  Comments.cpp
    ContinuationIndenter.cpp
    Format.cpp
    FormatToken.cpp
diff --git a/lib/Format/Comments.cpp b/lib/Format/Comments.cpp

deleted file mode 100644 (file)

index 1b27f5b..0000000
--- a/lib/Format/Comments.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-//===--- Comments.cpp - Comment Manipulation  -------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Implements comment manipulation.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Comments.h"
-
-namespace clang {
-namespace format {
-
-StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = {"///", "//", "//!"};
-  StringRef LongestPrefix;
-  for (StringRef KnownPrefix : KnownPrefixes) {
-    if (Comment.startswith(KnownPrefix)) {
-      size_t PrefixLength = KnownPrefix.size();
-      while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
-        ++PrefixLength;
-      if (PrefixLength > LongestPrefix.size())
-        LongestPrefix = Comment.substr(0, PrefixLength);
-    }
-  }
-  return LongestPrefix;
-}
-
-} // namespace format
-} // namespace clang
diff --git a/lib/Format/Comments.h b/lib/Format/Comments.h

deleted file mode 100644 (file)

index 59f0596..0000000
--- a/lib/Format/Comments.h
+++ /dev/null
@@ -1,33 +0,0 @@
-//===--- Comments.cpp - Comment manipulation  -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Declares comment manipulation functionality.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_FORMAT_COMMENTS_H
-#define LLVM_CLANG_LIB_FORMAT_COMMENTS_H
-
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace clang {
-namespace format {
-
-/// \brief Returns the comment prefix of the line comment \p Comment.
-///
-/// The comment prefix consists of a leading known prefix, like "//" or "///",
-/// together with the following whitespace.
-StringRef getLineCommentIndentPrefix(StringRef Comment);
-
-} // namespace format
-} // namespace clang
-
-#endif
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp

index 778ffe30f6278e07e08fc24d36fd4c95cc43eb6d..7f1ac848f4f945cec3b31fe8bdf39d5fa980f008 100644 (file)
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -20,7 +20,7 @@
  #include "clang/Format/Format.h"
  #include "llvm/Support/Debug.h"
  
-#define DEBUG_TYPE "format-formatter"
+#define DEBUG_TYPE "format-indenter"
  
  namespace clang {
  namespace format {
@@ -1154,7 +1154,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
      }
    } else if (Current.is(TT_BlockComment)) {
      if (!Current.isTrailingComment() || !Style.ReflowComments ||
-        CommentPragmasRegex.match(Current.TokenText.substr(2)))
+        CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+        // If a comment token switches formatting, like
+        // /* clang-format on */, we don't want to break it further,
+        // but we may still want to adjust its indentation.
+        switchesFormatting(Current))
        return addMultilineToken(Current, State);
      Token.reset(new BreakableBlockComment(
          Current, State.Line->Level, StartColumn, Current.OriginalColumn,
@@ -1163,11 +1167,13 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
               (Current.Previous == nullptr ||
                Current.Previous->isNot(TT_ImplicitStringLiteral))) {
      if (!Style.ReflowComments ||
-        CommentPragmasRegex.match(Current.TokenText.substr(2)))
+        CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+        switchesFormatting(Current))
        return 0;
-    Token.reset(new BreakableLineComment(Current, State.Line->Level,
-                                         StartColumn, /*InPPDirective=*/false,
-                                         Encoding, Style));
+    Token.reset(new BreakableLineCommentSection(
+        Current, State.Line->Level, StartColumn, Current.OriginalColumn,
+        !Current.Previous,
+        /*InPPDirective=*/false, Encoding, Style));
      // We don't insert backslashes when breaking line comments.
      ColumnLimit = Style.ColumnLimit;
    } else {
@@ -1178,15 +1184,27 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
  
    unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
    bool BreakInserted = false;
+  // We use a conservative reflowing strategy. Reflow starts after a line is
+  // broken or the corresponding whitespace compressed. Reflow ends as soon as a
+  // line that doesn't get reflown with the previous line is reached.
+  bool ReflowInProgress = false;
    unsigned Penalty = 0;
    unsigned RemainingTokenColumns = 0;
    for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
         LineIndex != EndIndex; ++LineIndex) {
+    BreakableToken::Split SplitBefore(StringRef::npos, 0);
+    if (ReflowInProgress) {
+      SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
+                                          RemainingSpace);
+    }
+    ReflowInProgress = SplitBefore.first != StringRef::npos;
+    unsigned TailOffset =
+        ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
      if (!DryRun)
-      Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
-    unsigned TailOffset = 0;
-    RemainingTokenColumns =
-        Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+      Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
+                                     RemainingSpace, SplitBefore, Whitespaces);
+    RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
+        LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
      while (RemainingTokenColumns > RemainingSpace) {
        BreakableToken::Split Split =
            Token->getSplit(LineIndex, TailOffset, ColumnLimit);
@@ -1198,17 +1216,23 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
          break;
        }
        assert(Split.first != 0);
-      unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
-          LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
  
-      // We can remove extra whitespace instead of breaking the line.
-      if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) {
-        RemainingTokenColumns = 0;
+      // Check if compressing the whitespace range will bring the line length
+      // under the limit. If that is the case, we perform whitespace compression
+      // instead of inserting a line break.
+      unsigned RemainingTokenColumnsAfterCompression =
+          Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
+      if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
+        RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
+        ReflowInProgress = true;
          if (!DryRun)
-          Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces);
+          Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
          break;
        }
  
+      unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
+          LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+
        // When breaking before a tab character, it may be moved by a few columns,
        // but will still be expanded to the next tab stop, so we don't save any
        // columns.
@@ -1226,6 +1250,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
        }
        TailOffset += Split.first + Split.second;
        RemainingTokenColumns = NewRemainingTokenColumns;
+      ReflowInProgress = true;
        BreakInserted = true;
      }
    }
@@ -1246,6 +1271,9 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
  
      State.Stack.back().LastSpace = StartColumn;
    }
+
+  Token->updateNextToken(State);
+
    return Penalty;
  }
  
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp

index b5f7de280acd71ddad7fbe4089a5d3e27d19777d..2e8f3ec9205e19c9a11dccdfc690e0e7dec4ed89 100644 (file)
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -1594,8 +1594,14 @@ void TokenAnnotator::setCommentLineLevels(
    for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
                                                            E = Lines.rend();
         I != E; ++I) {
-    if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
-        (*I)->First->Next == nullptr)
+    bool CommentLine = (*I)->First;
+    for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
+      if (!Tok->is(tok::comment)) {
+        CommentLine = false;
+        break;
+      }
+    }
+    if (NextNonCommentLine && CommentLine)
        (*I)->Level = NextNonCommentLine->Level;
      else
        NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp

index 1dcb866a16bda67df5b55bcb32837e951ca94cf6..5e67bde1f241cc24b830c29b8a910cea179b7ac3 100644 (file)
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -1998,7 +1998,9 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
    for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
                                                      E = Line.Tokens.end();
         I != E; ++I) {
-    llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
+    llvm::dbgs() << I->Tok->Tok.getName() << "["
+                 << "T=" << I->Tok->Type
+                 << ", OC=" << I->Tok->OriginalColumn << "] ";
    }
    for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
                                                      E = Line.Tokens.end();
@@ -2038,13 +2040,60 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
           FormatTok.NewlinesBefore > 0;
  }
  
+static bool isLineComment(const FormatToken &FormatTok) {
+  return FormatTok.is(tok::comment) &&
+         FormatTok.TokenText.startswith("//");
+}
+
+// Checks if \p FormatTok is a line comment that continues the line comment
+// section on \p Line.
+static bool continuesLineComment(const FormatToken &FormatTok,
+                                 const UnwrappedLine &Line) {
+  if (Line.Tokens.empty())
+    return false;
+  const FormatToken &FirstLineTok = *Line.Tokens.front().Tok;
+  // If Line starts with a line comment, then FormatTok continues the comment
+  // section if its original column is greater or equal to the  original start
+  // column of the line.
+  //
+  // If Line starts with a a different token, then FormatTok continues the
+  // comment section if its original column greater than the original start
+  // column of the line.
+  //
+  // For example, the second line comment continues the first in these cases:
+  // // first line
+  // // second line
+  // and:
+  // // first line
+  //  // second line
+  // and:
+  // int i; // first line
+  //  // second line
+  //
+  // The second line comment doesn't continue the first in these cases:
+  //   // first line
+  //  // second line
+  // and:
+  // int i; // first line
+  // // second line
+  unsigned MinContinueColumn =
+      FirstLineTok.OriginalColumn +
+      ((isLineComment(FirstLineTok) && !FirstLineTok.Next) ? 0 : 1);
+  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
+         isLineComment(*(Line.Tokens.back().Tok)) &&
+         FormatTok.OriginalColumn >= MinContinueColumn;
+}
+
  void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
    bool JustComments = Line->Tokens.empty();
    for (SmallVectorImpl<FormatToken *>::const_iterator
             I = CommentsBeforeNextToken.begin(),
             E = CommentsBeforeNextToken.end();
         I != E; ++I) {
-    if (isOnNewLine(**I) && JustComments)
+    // Line comments that belong to the same line comment section are put on the
+    // same line since later we might want to reflow content between them.
+    // See BreakableToken.
+    if (isOnNewLine(**I) && JustComments && !continuesLineComment(**I, *Line))
        addUnwrappedLine();
      pushToken(*I);
    }
@@ -2110,7 +2159,8 @@ void UnwrappedLineParser::readToken() {
  
      if (!FormatTok->Tok.is(tok::comment))
        return;
-    if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
+    if (!continuesLineComment(*FormatTok, *Line) &&
+        (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
        CommentsInCurrentLine = false;
      }
      if (CommentsInCurrentLine) {
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp

index b64506f39035f70681a7f8b0a565a9bd2590baf3..6b14af3f75676ec43f907eaa9a4e73b903587d1e 100644 (file)
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -127,7 +127,38 @@ void WhitespaceManager::calculateLineBreakInformation() {
      Changes[i - 1].IsTrailingComment =
          (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof ||
           (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) &&
-        Changes[i - 1].Kind == tok::comment;
+        Changes[i - 1].Kind == tok::comment &&
+        // FIXME: This is a dirty hack. The problem is that
+        // BreakableLineCommentSection does comment reflow changes and here is
+        // the aligning of trailing comments. Consider the case where we reflow
+        // the second line up in this example:
+        // 
+        // // line 1
+        // // line 2
+        // 
+        // That amounts to 2 changes by BreakableLineCommentSection:
+        //  - the first, delimited by (), for the whitespace between the tokens,
+        //  - and second, delimited by [], for the whitespace at the beginning
+        //  of the second token:
+        // 
+        // // line 1(
+        // )[// ]line 2
+        //
+        // So in the end we have two changes like this:
+        //
+        // // line1()[ ]line 2
+        //
+        // Note that the OriginalWhitespaceStart of the second change is the
+        // same as the PreviousOriginalWhitespaceEnd of the first change.
+        // In this case, the below check ensures that the second change doesn't
+        // get treated as a trailing comment change here, since this might
+        // trigger additional whitespace to be wrongly inserted before "line 2"
+        // by the comment aligner here.
+        //
+        // For a proper solution we need a mechanism to say to WhitespaceManager
+        // that a particular change breaks the current sequence of trailing
+        // comments.
+        OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
    }
    // FIXME: The last token is currently not always an eof token; in those
    // cases, setting TokenLength of the last token to 0 is wrong.
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp

index a7c2f7c9df2ba316591b56f15d0f79ab698a4b26..2be5062d6f62fd19acecedafd6713695e4e60539 100644 (file)
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,463 @@ TEST_F(FormatTest, CommentsInStaticInitializers) {
                 "    0x00, 0x00, 0x00, 0x00};            // comment\n");
  }
  
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+            "// long long",
+            format("// long long long long\n"
+                   "// long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+            "// long long\n",
+            format("// long long long long\n"
+                   "// long\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+            "// long long\n"
+            "// long_long",
+            format("// long long long long\n"
+                   "// long long_long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "// long_long_long\n",
+            format("// long long long long\n"
+                   "// long_long_long\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+               "// long long long lo\n"
+               "// long long long lo\n"
+               "// long",
+               getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+            "/// long long\n",
+            format("/// long long long long\n"
+                   "/// long\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+            "//! long long\n",
+            format("//! long long long long\n"
+                   "//! long\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+            " * long long */",
+            format("/* long long long long\n"
+                   " * long */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+            " * long long long\n"
+            " */",
+            format("/*  long long long long\n"
+                   " *  long long\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+            "   long long */",
+            format("/* long long long long\n"
+                   "   long */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+            " * long long long */",
+            format("/* long long long long\n"
+                   " * long\n"
+                   " * long */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+            " * long long longg\n"
+            " */",
+            format("/* long long long long\n"
+                   " * long\n"
+                   " * longg */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break single line block comments that are first in the line with ' *'
+  // decoration.
+  EXPECT_EQ("/* long long long\n"
+            " * long */",
+            format("/* long long long long */", getLLVMStyleWithColumns(20)));
+
+  // Break single line block comment that are not first in the line with '  '
+  // decoration.
+  EXPECT_EQ("int i; /* long long\n"
+            "          long */",
+            format("int i; /* long long long */", getLLVMStyleWithColumns(20)));
+
+  // Reflow a line that goes just over the column limit.
+  EXPECT_EQ("// long long long\n"
+            "// lon long",
+            format("// long long long lon\n"
+                   "// long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Stop reflowing if the next line has a different indentation than the
+  // previous line.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "//  long long\n"
+            "//  long",
+            format("// long long long long\n"
+                   "//  long long\n"
+                   "//  long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow into the last part of a really long line that has been broken into
+  // multiple lines.
+  EXPECT_EQ("// long long long\n"
+            "// long long long\n"
+            "// long long long\n",
+            format("// long long long long long long long long\n"
+                   "// long\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break the first line, then reflow the beginning of the second and third
+  // line up.
+  EXPECT_EQ("// long long long\n"
+            "// lon1 lon2 lon2\n"
+            "// lon2 lon3 lon3",
+            format("// long long long lon1\n"
+                   "// lon2 lon2 lon2\n"
+                   "// lon3 lon3",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow the beginning of the second line, then break the rest.
+  EXPECT_EQ("// long long long\n"
+            "// lon1 lon2 lon2\n"
+            "// lon2 lon2 lon2\n"
+            "// lon3",
+            format("// long long long lon1\n"
+                   "// lon2 lon2 lon2 lon2 lon2 lon3",
+                   getLLVMStyleWithColumns(20)));
+
+  // Shrink the first line, then reflow the second line up.
+  EXPECT_EQ("// long long long", format("// long              long\n"
+                                        "// long",
+                                        getLLVMStyleWithColumns(20)));
+
+  // Don't shrink leading whitespace.
+  EXPECT_EQ("int i; ///           a",
+            format("int i; ///           a", getLLVMStyleWithColumns(20)));
+
+  // Shrink trailing whitespace if there is no postfix and reflow.
+  EXPECT_EQ("// long long long\n"
+            "// long long",
+            format("// long long long long    \n"
+                   "// long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Shrink trailing whitespace to a single one if there is postfix.
+  EXPECT_EQ("/* long long long */",
+            format("/* long long long     */", getLLVMStyleWithColumns(20)));
+
+  // Break a block comment postfix if exceeding the line limit.
+  EXPECT_EQ("/*               long\n"
+            " */",
+            format("/*               long */", getLLVMStyleWithColumns(20)));
+
+  // Reflow indented comments.
+  EXPECT_EQ("{\n"
+            "  // long long long\n"
+            "  // long long\n"
+            "  int i; /* long lon\n"
+            "            g long\n"
+            "          */\n"
+            "}",
+            format("{\n"
+                   "  // long long long long\n"
+                   "  // long\n"
+                   "  int i; /* long lon g\n"
+                   "            long */\n"
+                   "}",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't realign trailing comments after reflow has happened.
+  EXPECT_EQ("// long long long\n"
+            "// long long\n"
+            "long i; // long",
+            format("// long long long long\n"
+                   "// long\n"
+                   "long i; // long",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("// long long long\n"
+            "// longng long long\n"
+            "// long lo",
+            format("// long long long longng\n"
+                   "// long long long\n"
+                   "// lo",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow lines after a broken line.
+  EXPECT_EQ("int a; // Trailing\n"
+            "       // comment on\n"
+            "       // 2 or 3\n"
+            "       // lines.\n",
+            format("int a; // Trailing comment\n"
+                   "       // on 2\n"
+                   "       // or 3\n"
+                   "       // lines.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/// This long line\n"
+            "/// gets reflown.\n",
+            format("/// This long line gets\n"
+                   "/// reflown.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! This long line\n"
+            "//! gets reflown.\n",
+            format(" //! This long line gets\n"
+                   " //! reflown.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* This long line\n"
+            " * gets reflown.\n"
+            " */\n",
+            format("/* This long line gets\n"
+                   " * reflown.\n"
+                   " */\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow after indentation makes a line too long.
+  EXPECT_EQ("{\n"
+            "  // long long long\n"
+            "  // lo long\n"
+            "}\n",
+            format("{\n"
+                   "// long long long lo\n"
+                   "// long\n"
+                   "}\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break and reflow multiple lines.
+  EXPECT_EQ("/*\n"
+            " * Reflow the end of\n"
+            " * line by 11 22 33\n"
+            " * 4.\n"
+            " */\n",
+            format("/*\n"
+                   " * Reflow the end of line\n"
+                   " * by\n"
+                   " * 11\n"
+                   " * 22\n"
+                   " * 33\n"
+                   " * 4.\n"
+                   " */\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/// First line gets\n"
+            "/// broken. Second\n"
+            "/// line gets\n"
+            "/// reflown and\n"
+            "/// broken. Third\n"
+            "/// gets reflown.\n",
+            format("/// First line gets broken.\n"
+                   "/// Second line gets reflown and broken.\n"
+                   "/// Third gets reflown.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("int i; // first long\n"
+            "       // long snd\n"
+            "       // long.\n",
+            format("int i; // first long long\n"
+                   "       // snd long.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  // first long line\n"
+            "  // line second\n"
+            "  // long line line\n"
+            "  // third long line\n"
+            "  // line\n"
+            "}\n",
+            format("{\n"
+                   "  // first long line line\n"
+                   "  // second long line line\n"
+                   "  // third long line line\n"
+                   "}\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("int i; /* first line\n"
+            "        * second\n"
+            "        * line third\n"
+            "        * line\n"
+            "        */",
+            format("int i; /* first line\n"
+                   "        * second line\n"
+                   "        * third line\n"
+                   "        */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last two lines of a section that starts with a line having
+  // different indentation.
+  EXPECT_EQ(
+      "//     long\n"
+      "// long long long\n"
+      "// long long",
+      format("//     long\n"
+             "// long long long long\n"
+             "// long",
+             getLLVMStyleWithColumns(20)));
+
+  // Keep the block comment endling '*/' while reflowing.
+  EXPECT_EQ("/* Long long long\n"
+            " * line short */\n",
+            format("/* Long long long line\n"
+                   " * short */\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow between separate blocks of comments.
+  EXPECT_EQ("/* First comment\n"
+            " * block will */\n"
+            "/* Snd\n"
+            " */\n",
+            format("/* First comment block\n"
+                   " * will */\n"
+                   "/* Snd\n"
+                   " */\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow across blank comment lines.
+  EXPECT_EQ("int i; // This long\n"
+            "       // line gets\n"
+            "       // broken.\n"
+            "       //  \n"
+            "       // keep.\n",
+            format("int i; // This long line gets broken.\n"
+                   "       //  \n"
+                   "       // keep.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  /// long long long\n"
+            "  /// long long\n"
+            "  ///\n"
+            "  /// long\n"
+            "}",
+            format("{\n"
+                   "  /// long long long long\n"
+                   "  /// long\n"
+                   "  ///\n"
+                   "  /// long\n"
+                   "}",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+            "//! long\n"
+            "\n"
+            "//! long",
+            format("//! long long long long\n"
+                   "\n"
+                   "//! long",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+            "   long\n"
+            "\n"
+            "   long */",
+            format("/* long long long long\n"
+                   "\n"
+                   "   long */",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+            " * long\n"
+            " *\n"
+            " * long */",
+            format("/* long long long long\n"
+                   " *\n"
+                   " * long */",
+                   getLLVMStyleWithColumns(20)));
+  
+  // Don't reflow lines having content that is a single character.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "// l",
+            format("// long long long long\n"
+                   "// l",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow lines starting with two punctuation characters.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "// ... --- ...",
+            format(
+                "// long long long long\n"
+                "// ... --- ...",
+                getLLVMStyleWithColumns(20)));
+  // Reflow lines that have a non-punctuation character among their first 2
+  // characters.
+  EXPECT_EQ("// long long long\n"
+            "// long 'long'",
+            format(
+                "// long long long long\n"
+                "// 'long'",
+                getLLVMStyleWithColumns(20)));
+
+  // Don't reflow between separate blocks of comments.
+  EXPECT_EQ("/* First comment\n"
+            " * block will */\n"
+            "/* Snd\n"
+            " */\n",
+            format("/* First comment block\n"
+                   " * will */\n"
+                   "/* Snd\n"
+                   " */\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow lines having different indentation.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "//  long",
+            format("// long long long long\n"
+                   "//  long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow after implicit string literals.
+  verifyFormat("#include <t> // l l l\n"
+               "             // l",
+               getLLVMStyleWithColumns(20));
+
+  // Don't break or reflow comments on import lines.
+  EXPECT_EQ("#include \"t\" /* l l l\n"
+            "                * l */",
+            format("#include \"t\" /* l l l\n"
+                   "                * l */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow between different trailing comment sections.
+  EXPECT_EQ("int i; // long long\n"
+            "       // long\n"
+            "int j; // long long\n"
+            "       // long\n",
+            format("int i; // long long long\n"
+                   "int j; // long long long\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow if the first word on the next line is longer than the
+  // available space at current line.
+  EXPECT_EQ("int i; // trigger\n"
+            "       // reflow\n"
+            "       // longsec\n",
+            format("int i; // trigger reflow\n"
+                   "       // longsec\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Keep empty comment lines.
+  EXPECT_EQ("/**/", format(" /**/", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* */", format(" /* */", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*  */", format(" /*  */", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//", format(" //  ", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
+}
+
  TEST_F(FormatTest, IgnoresIf0Contents) {
    EXPECT_EQ("#if 0\n"
              "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -7238,11 +7695,9 @@ TEST_F(FormatTest, BlockCommentsAtEndOfLine) {
              format("a = {1111 /*      */\n"
                     "};",
                     getLLVMStyleWithColumns(15)));
-
-  // FIXME: The formatting is still wrong here.
    EXPECT_EQ("a = {\n"
              "    1111 /*      a\n"
-            "            */\n"
+            "          */\n"
              "};",
              format("a = {1111 /*      a */\n"
                     "};",
@@ -10933,6 +11388,26 @@ TEST_F(FormatTest, DisableRegions) {
                     "  int j;\n"
                     " /* clang-format on */\n"
                     "   int   k;"));
+
+  // Don't reflow comments within disabled regions.
+  EXPECT_EQ(
+      "// clang-format off\n"
+      "// long long long long long long line\n"
+      "/* clang-format on */\n"
+      "/* long long long\n"
+      " * long long long\n"
+      " * line */\n"
+      "int i;\n"
+      "/* clang-format off */\n"
+      "/* long long long long long long line */\n",
+      format("// clang-format off\n"
+             "// long long long long long long line\n"
+             "/* clang-format on */\n"
+             "/* long long long long long long line */\n"
+             "int i;\n"
+             "/* clang-format off */\n"
+             "/* long long long long long long line */\n",
+             getLLVMStyleWithColumns(20)));
  }
  
  TEST_F(FormatTest, DoNotCrashOnInvalidInput) {
diff --git a/unittests/Format/FormatTestSelective.cpp b/unittests/Format/FormatTestSelective.cpp

index 2bc60fd1e0d33a226b303da5b7f5c0a468ea27f4..367dbafcaf4cc47437865f58e082f58b5b5e33fc 100644 (file)
--- a/unittests/Format/FormatTestSelective.cpp
+++ b/unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@ TEST_F(FormatTestSelective, FormatsCommentsLocally) {
              format("int   a; // comment\n"
                     "int    b; // comment",
                     0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-            "         // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+            "       // line 2\n"
              "int b;",
              format("int   a; // comment\n"
                     "            // line 2\n"
                     "int b;",
                     28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+            "// comment 2\n"
+            "int b;",
+            format("int   a; // comment\n"
+                   "// comment 2\n"
+                   "int b;", 28, 0));
    EXPECT_EQ("int aaaaaa; // comment\n"
              "int b;\n"
              "int c; // unrelated comment",
author	Krasimir Georgiev <krasimir@google.com>
	Wed, 25 Jan 2017 13:58:58 +0000 (13:58 +0000)
committer	Krasimir Georgiev <krasimir@google.com>
	Wed, 25 Jan 2017 13:58:58 +0000 (13:58 +0000)
lib/Format/BreakableToken.cpp		patch \| blob \| history
lib/Format/BreakableToken.h		patch \| blob \| history
lib/Format/CMakeLists.txt		patch \| blob \| history
lib/Format/Comments.cpp	[deleted file]	patch \| blob \| history
lib/Format/Comments.h	[deleted file]	patch \| blob \| history
lib/Format/ContinuationIndenter.cpp		patch \| blob \| history
lib/Format/TokenAnnotator.cpp		patch \| blob \| history
lib/Format/UnwrappedLineParser.cpp		patch \| blob \| history
lib/Format/WhitespaceManager.cpp		patch \| blob \| history
unittests/Format/FormatTest.cpp		patch \| blob \| history
unittests/Format/FormatTestSelective.cpp		patch \| blob \| history