Major refactoring of BreakableToken.

author Manuel Klimek <klimek@google.com>

Mon, 27 May 2013 15:23:34 +0000 (15:23 +0000)

committer Manuel Klimek <klimek@google.com>

Mon, 27 May 2013 15:23:34 +0000 (15:23 +0000)
author Manuel Klimek <klimek@google.com>
Mon, 27 May 2013 15:23:34 +0000 (15:23 +0000)
committer Manuel Klimek <klimek@google.com>
Mon, 27 May 2013 15:23:34 +0000 (15:23 +0000)
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp

index 320913c2d46f096c02c9ac107d5746844d710a6f..1b1827e3f9a9977f95c713664455ead7a642efb5 100644 (file)
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -13,27 +13,82 @@
  ///
  //===----------------------------------------------------------------------===//
  
+#define DEBUG_TYPE "format-token-breaker"
+
  #include "BreakableToken.h"
+#include "clang/Format/Format.h"
  #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
  #include <algorithm>
  
  namespace clang {
  namespace format {
+namespace {
+
+// FIXME: Move helper string functions to where it makes sense.
+
+unsigned getOctalLength(StringRef Text) {
+  unsigned I = 1;
+  while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+    ++I;
+  }
+  return I;
+}
+
+unsigned getHexLength(StringRef Text) {
+  unsigned I = 2; // Point after '\x'.
+  while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+                             (Text[I] >= 'a' && Text[I] <= 'f') ||
+                             (Text[I] >= 'A' && Text[I] <= 'F'))) {
+    ++I;
+  }
+  return I;
+}
+
+unsigned getEscapeSequenceLength(StringRef Text) {
+  assert(Text[0] == '\\');
+  if (Text.size() < 2)
+    return 1;
  
-BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
-                                                 unsigned TailOffset,
-                                                 unsigned ColumnLimit) const {
-  StringRef Text = getLine(LineIndex).substr(TailOffset);
-  unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+  switch (Text[1]) {
+  case 'u':
+    return 6;
+  case 'U':
+    return 10;
+  case 'x':
+    return getHexLength(Text);
+  default:
+    if (Text[1] >= '0' && Text[1] <= '7')
+      return getOctalLength(Text);
+    return 2;
+  }
+}
+
+StringRef::size_type getStartOfCharacter(StringRef Text,
+                                         StringRef::size_type Offset) {
+  StringRef::size_type NextEscape = Text.find('\\');
+  while (NextEscape != StringRef::npos && NextEscape < Offset) {
+    StringRef::size_type SequenceLength =
+        getEscapeSequenceLength(Text.substr(NextEscape));
+    if (Offset < NextEscape + SequenceLength)
+      return NextEscape;
+    NextEscape = Text.find('\\', NextEscape + SequenceLength);
+  }
+  return Offset;
+}
+
+BreakableToken::Split getCommentSplit(StringRef Text,
+                                      unsigned ContentStartColumn,
+                                      unsigned ColumnLimit) {
    if (ColumnLimit <= ContentStartColumn + 1)
-    return Split(StringRef::npos, 0);
+    return BreakableToken::Split(StringRef::npos, 0);
  
    unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
    StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
    if (SpaceOffset == StringRef::npos ||
-      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
+      // Don't break at leading whitespace.
+      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos)
      SpaceOffset = Text.find(' ', MaxSplit);
-  }
    if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
      StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
      StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
@@ -43,142 +98,247 @@ BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
    return BreakableToken::Split(StringRef::npos, 0);
  }
  
-void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                   Split Split, bool InPPDirective,
-                                   WhitespaceManager &Whitespaces) {
-  StringRef Text = getLine(LineIndex).substr(TailOffset);
-  StringRef AdditionalPrefix = Decoration;
-  if (Text.size() == Split.first + Split.second) {
-    // For all but the last line handle trailing space in trimLine.
-    if (LineIndex < Lines.size() - 1)
-      return;
-    // For the last line we need to break before "*/", but not to add "* ".
-    AdditionalPrefix = "";
-  }
+BreakableToken::Split getStringSplit(StringRef Text,
+                                     unsigned ContentStartColumn,
+                                     unsigned ColumnLimit) {
  
-  unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
-  unsigned CharsToRemove = Split.second;
-  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
-                         InPPDirective, IndentAtLineBreak);
+  if (ColumnLimit <= ContentStartColumn)
+    return BreakableToken::Split(StringRef::npos, 0);
+  unsigned MaxSplit = ColumnLimit - ContentStartColumn;
+  // FIXME: Reduce unit test case.
+  if (Text.empty())
+    return BreakableToken::Split(StringRef::npos, 0);
+  MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
+  StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+  if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+    return BreakableToken::Split(SpaceOffset + 1, 0);
+  StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
+  if (SlashOffset != StringRef::npos && SlashOffset != 0)
+    return BreakableToken::Split(SlashOffset + 1, 0);
+  StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
+  if (SplitPoint == StringRef::npos || SplitPoint == 0)
+    return BreakableToken::Split(StringRef::npos, 0);
+  return BreakableToken::Split(SplitPoint, 0);
  }
  
-BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
-                                             const AnnotatedToken &Token,
-                                             unsigned StartColumn)
-    : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) {
-  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+} // namespace
+
+unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+
+unsigned
+BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
+                                                  unsigned TailOffset) const {
+  return StartColumn + Prefix.size() + Postfix.size() + Line.size() -
+         TailOffset;
+}
+
+void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
+                                           unsigned TailOffset, Split Split,
+                                           bool InPPDirective,
+                                           WhitespaceManager &Whitespaces) {
+  Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
+                         Split.second, Postfix, Prefix, InPPDirective,
+                         StartColumn);
+}
+
+BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
+                                                   unsigned StartColumn,
+                                                   StringRef Prefix,
+                                                   StringRef Postfix)
+    : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),
+      Postfix(Postfix) {
+  assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
+  Line = Tok.TokenText.substr(
+      Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
+}
+
+BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
+                                               unsigned StartColumn)
+    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}
  
-  OriginalStartColumn =
-      SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1;
+BreakableToken::Split
+BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
+                                 unsigned ColumnLimit) const {
+  return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);
+}
  
+static StringRef getLineCommentPrefix(StringRef Comment) {
+  const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+  for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
+    if (Comment.startswith(KnownPrefixes[i]))
+      return KnownPrefixes[i];
+  return "";
+}
+
+BreakableLineComment::BreakableLineComment(const FormatToken &Token,
+                                           unsigned StartColumn)
+    : BreakableSingleLineToken(Token, StartColumn,
+                               getLineCommentPrefix(Token.TokenText), "") {}
+
+BreakableToken::Split
+BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+                               unsigned ColumnLimit) const {
+  return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+                         ColumnLimit);
+}
+
+BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,
+                                             const FormatToken &Token,
+                                             unsigned StartColumn,
+                                             unsigned OriginalStartColumn,
+                                             bool FirstInLine)
+    : BreakableToken(Token) {
+  StringRef TokenText(Token.TokenText);
+  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
    TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
  
+  int IndentDelta = StartColumn - OriginalStartColumn;
    bool NeedsStar = true;
-  CommonPrefixLength = UINT_MAX;
-  if (Lines.size() == 1) {
-    if (Token.Parent == 0) {
-      // Standalone block comments will be aligned and prefixed with *s.
-      CommonPrefixLength = OriginalStartColumn + 1;
-    } else {
-      // Trailing comments can start on arbitrary column, and available
-      // horizontal space can be too small to align consecutive lines with
-      // the first one. We could, probably, align them to current
-      // indentation level, but now we just wrap them without indentation
-      // and stars.
-      CommonPrefixLength = 0;
-      NeedsStar = false;
+  LeadingWhitespace.resize(Lines.size());
+  StartOfLineColumn.resize(Lines.size());
+  if (Lines.size() == 1 && !FirstInLine) {
+    // Comments for which FirstInLine is false can start on arbitrary column,
+    // and available horizontal space can be too small to align consecutive
+    // lines with the first one.
+    // FIXME: We could, probably, align them to current indentation level, but
+    // now we just wrap them without stars.
+    NeedsStar = false;
+  }
+  StartOfLineColumn[0] = StartColumn + 2;
+  for (size_t i = 1; i < Lines.size(); ++i) {
+    adjustWhitespace(Style, i, IndentDelta);
+    if (Lines[i].empty())
+      // If the last line is empty, the closing "*/" will have a star.
+      NeedsStar = NeedsStar && i + 1 == Lines.size();
+    else
+      NeedsStar = NeedsStar && Lines[i][0] == '*';
+  }
+  Decoration = NeedsStar ? "* " : "";
+  IndentAtLineBreak = StartOfLineColumn[0] + 1;
+  for (size_t i = 1; i < Lines.size(); ++i) {
+    if (Lines[i].empty()) {
+      if (!NeedsStar && i + 1 != Lines.size())
+        // For all but the last line (which always ends in */), set the
+        // start column to 0 if they're empty, so we do not insert
+        // trailing whitespace anywhere.
+        StartOfLineColumn[i] = 0;
+      continue;
      }
-  } else {
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
-      if (FirstNonWhitespace != StringRef::npos) {
-        NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
-        CommonPrefixLength =
-            std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
-      }
+    if (NeedsStar) {
+      // The first line already excludes the star.
+      // For all other lines, adjust the line to exclude the star and
+      // (optionally) the first whitespace.
+      int Offset = Lines[i].startswith("* ") ? 2 : 1;
+      StartOfLineColumn[i] += Offset;
+      Lines[i] = Lines[i].substr(Offset);
+      LeadingWhitespace[i] += Offset;
      }
+    IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
    }
-  if (CommonPrefixLength == UINT_MAX)
-    CommonPrefixLength = 0;
+  DEBUG({
+    for (size_t i = 0; i < Lines.size(); ++i) {
+      llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
+                   << "\n";
+    }
+  });
+}
  
-  Decoration = NeedsStar ? "* " : "";
+void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
+                                             unsigned LineIndex,
+                                             int IndentDelta) {
+  // Calculate the end of the non-whitespace text in the previous line.
+  size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
+  if (EndOfPreviousLine == StringRef::npos)
+    EndOfPreviousLine = 0;
+  else
+    ++EndOfPreviousLine;
+  // Calculate the start of the non-whitespace text in the current line.
+  size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
+  if (StartOfLine == StringRef::npos)
+    StartOfLine = Lines[LineIndex].size();
+  // FIXME: Tabs are not always 8 characters. Make configurable in the style.
+  unsigned Column = 0;
+  StringRef OriginalIndentText = Lines[LineIndex].substr(0, StartOfLine);
+  for (int i = 0, e = OriginalIndentText.size(); i != e; ++i) {
+    if (Lines[LineIndex][i] == '\t')
+      Column += 8 - (Column % 8);
+    else
+      ++Column;
+  }
  
-  IndentAtLineBreak =
-      std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+  // Adjust Lines to only contain relevant text.
+  Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
+  Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
+  // Adjust LeadingWhitespace to account all whitespace between the lines
+  // to the current line.
+  LeadingWhitespace[LineIndex] =
+      Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+  // Adjust the start column uniformly accross all lines.
+  StartOfLineColumn[LineIndex] = std::max<int>(0, Column + IndentDelta);
  }
  
-void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
-  SourceLocation TokenLoc = Tok.getStartOfNonWhitespace();
-  int IndentDelta = (StartColumn - 2) - OriginalStartColumn;
-  if (IndentDelta > 0) {
-    std::string WhiteSpace(IndentDelta, ' ');
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      Whitespaces.addReplacement(
-          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
-          WhiteSpace);
-    }
-  } else if (IndentDelta < 0) {
-    std::string WhiteSpace(-IndentDelta, ' ');
-    // Check that the line is indented enough.
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      if (!Lines[i].startswith(WhiteSpace))
-        return;
-    }
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      Whitespaces.addReplacement(
-          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
-          -IndentDelta, "");
-    }
-  }
+unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
  
-  for (unsigned i = 1; i < Lines.size(); ++i)
-    Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size());
+unsigned
+BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
+                                               unsigned TailOffset) const {
+  return getContentStartColumn(LineIndex, TailOffset) +
+         (Lines[LineIndex].size() - TailOffset) +
+         // The last line gets a "*/" postfix.
+         (LineIndex + 1 == Lines.size() ? 2 : 0);
  }
  
-void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
-                                     unsigned InPPDirective,
-                                     WhitespaceManager &Whitespaces) {
-  if (LineIndex == Lines.size() - 1)
-    return;
+BreakableToken::Split
+BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+                                unsigned ColumnLimit) const {
+  return getCommentSplit(Lines[LineIndex].substr(TailOffset),
+                         getContentStartColumn(LineIndex, TailOffset),
+                         ColumnLimit);
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+                                        Split Split, bool InPPDirective,
+                                        WhitespaceManager &Whitespaces) {
    StringRef Text = Lines[LineIndex].substr(TailOffset);
+  StringRef Prefix = Decoration;
+  if (LineIndex + 1 == Lines.size() &&
+      Text.size() == Split.first + Split.second) {
+    // For the last line we need to break before "*/", but not to add "* ".
+    Prefix = "";
+  }
  
-  // FIXME: The algorithm for trimming a line should naturally yield a
-  // non-change if there is nothing to trim; removing this line breaks the
-  // algorithm; investigate the root cause, and make sure to either document
-  // why exactly this is needed for remove it.
-  if (!Text.endswith(" ") && !InPPDirective)
+  unsigned BreakOffsetInToken =
+      Text.data() - Tok.TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
+  Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
+                         InPPDirective, IndentAtLineBreak - Decoration.size());
+}
+
+void
+BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
+                                               unsigned InPPDirective,
+                                               WhitespaceManager &Whitespaces) {
+  if (LineIndex == 0)
      return;
+  StringRef Prefix = Decoration;
+  if (LineIndex + 1 == Lines.size() && Lines[LineIndex].empty())
+    Prefix = "";
  
-  StringRef TrimmedLine = Text.rtrim();
-  unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
-  unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
-  // FIXME: It seems like we're misusing the call to breakToken to remove
-  // whitespace instead of breaking a token. We should make this an explicit
-  // call option to the WhitespaceManager, or handle trimming and alignment
-  // of comments completely within in the WhitespaceManger. Passing '0' here
-  // and relying on this not breaking assumptions of the WhitespaceManager seems
-  // like a bad idea.
-  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
-                         0);
-}
-
-BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr,
-                                           const AnnotatedToken &Token,
-                                           unsigned StartColumn)
-    : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) {
-  assert(TokenText.startswith("//"));
-  Decoration = getLineCommentPrefix(TokenText);
-  Lines.push_back(TokenText.substr(Decoration.size()));
-  IndentAtLineBreak = StartColumn;
-  this->StartColumn += Decoration.size(); // Start column of the contents.
+  unsigned WhitespaceOffsetInToken =
+      Lines[LineIndex].data() - Tok.TokenText.data() -
+      LeadingWhitespace[LineIndex];
+  Whitespaces.breakToken(
+      Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
+      InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
  }
  
-StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) {
-  const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
-  for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
-    if (Comment.startswith(KnownPrefixes[i]))
-      return KnownPrefixes[i];
-  return "";
+unsigned
+BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+                                             unsigned TailOffset) const {
+  // If we break, we always break at the predefined indent.
+  if (TailOffset != 0)
+    return IndentAtLineBreak;
+  return StartOfLineColumn[LineIndex];
  }
  
  } // namespace format
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h

index c7ba044cf1e06708763d98bba61993fd22b6ca7e..03904c2a4680379546b9be5ed6ea669a3ad4ba47 100644 (file)
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -24,215 +24,174 @@
  namespace clang {
  namespace format {
  
+struct FormatStyle;
+
+/// \brief Base class for strategies on how to break tokens.
+///
+/// FIXME: The interface seems set in stone, so we might want to just pull the
+/// strategy into the class, instead of controlling it from the outside.
  class BreakableToken {
  public:
-  BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok,
-                 unsigned StartColumn)
-      : Tok(Tok), StartColumn(StartColumn),
-        TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()),
-                  Tok.TokenLength) {}
+  // Contains starting character index and length of split.
+  typedef std::pair<StringRef::size_type, unsigned> Split;
+
    virtual ~BreakableToken() {}
+
+  /// \brief Returns the number of lines in this token in the original code.
    virtual unsigned getLineCount() const = 0;
-  virtual unsigned getLineSize(unsigned Index) const = 0;
+
+  /// \brief Returns the rest of the length of the line at \p LineIndex,
+  /// when broken at \p TailOffset.
+  ///
+  /// Note that previous breaks are not taken into account. \p TailOffset
+  /// is always specified from the start of the (original) line.
    virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
                                             unsigned TailOffset) const = 0;
  
-  // Contains starting character index and length of split.
-  typedef std::pair<StringRef::size_type, unsigned> Split;
+  /// \brief Returns a range (offset, length) at which to break the line at
+  /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
+  /// violate \p ColumnLimit.
    virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                           unsigned ColumnLimit) const = 0;
+
+  /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
    virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                             bool InPPDirective,
                             WhitespaceManager &Whitespaces) = 0;
-  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
-                        unsigned InPPDirective,
-                        WhitespaceManager &Whitespaces) {}
+
+  /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+  virtual void replaceWhitespaceBefore(unsigned LineIndex,
+                                       unsigned InPPDirective,
+                                       WhitespaceManager &Whitespaces) {}
+
  protected:
+  BreakableToken(const FormatToken &Tok) : Tok(Tok) {}
+
    const FormatToken &Tok;
-  unsigned StartColumn;
-  StringRef TokenText;
  };
  
-class BreakableStringLiteral : public BreakableToken {
+/// \brief Base class for single line tokens that can be broken.
+///
+/// \c getSplit() needs to be implemented by child classes.
+class BreakableSingleLineToken : public BreakableToken {
  public:
-  BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok,
-                         unsigned StartColumn)
-      : BreakableToken(SourceMgr, Tok, StartColumn) {
-    assert(TokenText.startswith("\"") && TokenText.endswith("\""));
-  }
-
-  virtual unsigned getLineCount() const { return 1; }
-
-  virtual unsigned getLineSize(unsigned Index) const {
-    return Tok.TokenLength - 2; // Should be in sync with getLine
-  }
-
+  virtual unsigned getLineCount() const;
    virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return getDecorationLength() + getLine().size() - TailOffset;
-  }
-
-  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
-                         unsigned ColumnLimit) const {
-    StringRef Text = getLine().substr(TailOffset);
-    if (ColumnLimit <= getDecorationLength())
-      return Split(StringRef::npos, 0);
-    unsigned MaxSplit = ColumnLimit - getDecorationLength();
-    // FIXME: Reduce unit test case.
-    if (Text.empty())
-      return Split(StringRef::npos, 0);
-    MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
-    StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
-    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
-      return Split(SpaceOffset + 1, 0);
-    StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
-    if (SlashOffset != StringRef::npos && SlashOffset != 0)
-      return Split(SlashOffset + 1, 0);
-    StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
-    if (SplitPoint != StringRef::npos && SplitPoint > 1)
-      // Do not split at 0.
-      return Split(SplitPoint, 0);
-    return Split(StringRef::npos, 0);
-  }
-
+                                           unsigned TailOffset) const;
    virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces) {
-    Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second,
-                           "\"", "\"", InPPDirective, StartColumn);
-  }
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
  
-private:
-  StringRef getLine() const {
-    // Get string without quotes.
-    // FIXME: Handle string prefixes.
-    return TokenText.substr(1, TokenText.size() - 2);
-  }
-
-  unsigned getDecorationLength() const { return StartColumn + 2; }
-
-  static StringRef::size_type getStartOfCharacter(StringRef Text,
-                                                  StringRef::size_type Offset) {
-    StringRef::size_type NextEscape = Text.find('\\');
-    while (NextEscape != StringRef::npos && NextEscape < Offset) {
-      StringRef::size_type SequenceLength =
-          getEscapeSequenceLength(Text.substr(NextEscape));
-      if (Offset < NextEscape + SequenceLength)
-        return NextEscape;
-      NextEscape = Text.find('\\', NextEscape + SequenceLength);
-    }
-    return Offset;
-  }
-
-  static unsigned getEscapeSequenceLength(StringRef Text) {
-    assert(Text[0] == '\\');
-    if (Text.size() < 2)
-      return 1;
-
-    switch (Text[1]) {
-    case 'u':
-      return 6;
-    case 'U':
-      return 10;
-    case 'x':
-      return getHexLength(Text);
-    default:
-      if (Text[1] >= '0' && Text[1] <= '7')
-        return getOctalLength(Text);
-      return 2;
-    }
-  }
-
-  static unsigned getHexLength(StringRef Text) {
-    unsigned I = 2; // Point after '\x'.
-    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
-                               (Text[I] >= 'a' && Text[I] <= 'f') ||
-                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
-      ++I;
-    }
-    return I;
-  }
-
-  static unsigned getOctalLength(StringRef Text) {
-    unsigned I = 1;
-    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
-      ++I;
-    }
-    return I;
-  }
+protected:
+  BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
+                           StringRef Prefix, StringRef Postfix);
  
+  // The column in which the token starts.
+  unsigned StartColumn;
+  // The prefix a line needs after a break in the token.
+  StringRef Prefix;
+  // The postfix a line needs before introducing a break.
+  StringRef Postfix;
+  // The token text excluding the prefix and postfix.
+  StringRef Line;
  };
  
-class BreakableComment : public BreakableToken {
+class BreakableStringLiteral : public BreakableSingleLineToken {
  public:
-  virtual unsigned getLineSize(unsigned Index) const {
-    return getLine(Index).size();
-  }
-
-  virtual unsigned getLineCount() const { return Lines.size(); }
-
-  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return getContentStartColumn(LineIndex, TailOffset) +
-           getLine(LineIndex).size() - TailOffset;
-  }
+  /// \brief Creates a breakable token for a single line string literal.
+  ///
+  /// \p StartColumn specifies the column in which the token will start
+  /// after formatting.
+  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn);
  
    virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                           unsigned ColumnLimit) const;
-  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces);
-
-protected:
-  BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok,
-                   unsigned StartColumn)
-      : BreakableToken(SourceMgr, Tok, StartColumn) {}
-
-  // Get comment lines without /* */, common prefix and trailing whitespace.
-  // Last line is not trimmed, as it is terminated by */, so its trailing
-  // whitespace is not really trailing.
-  StringRef getLine(unsigned Index) const {
-    return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index];
-  }
-
-  unsigned getContentStartColumn(unsigned LineIndex,
-                                 unsigned TailOffset) const {
-    return (TailOffset == 0 && LineIndex == 0)
-               ? StartColumn
-               : IndentAtLineBreak + Decoration.size();
-  }
-
-  unsigned IndentAtLineBreak;
-  StringRef Decoration;
-  SmallVector<StringRef, 16> Lines;
  };
  
-class BreakableBlockComment : public BreakableComment {
+class BreakableLineComment : public BreakableSingleLineToken {
  public:
-  BreakableBlockComment(const SourceManager &SourceMgr,
-                        const AnnotatedToken &Token, unsigned StartColumn);
+  /// \brief Creates a breakable token for a line comment.
+  ///
+  /// \p StartColumn specifies the column in which the comment will start
+  /// after formatting.
+  BreakableLineComment(const FormatToken &Token, unsigned StartColumn);
  
-  void alignLines(WhitespaceManager &Whitespaces);
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) const;
+};
  
+class BreakableBlockComment : public BreakableToken {
+public:
+  /// \brief Creates a breakable token for a block comment.
+  ///
+  /// \p StartColumn specifies the column in which the comment will start
+  /// after formatting, while \p OriginalStartColumn specifies in which
+  /// column the comment started before formatting.
+  /// If the comment starts a line after formatting, set \p FirstInLine to true.
+  BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
+                        unsigned StartColumn, unsigned OriginaStartColumn,
+                        bool FirstInLine);
+
+  virtual unsigned getLineCount() const;
    virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) +
-           (LineIndex + 1 < Lines.size() ? 0 : 2);
-  }
-
-  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
-                        unsigned InPPDirective, WhitespaceManager &Whitespaces);
+                                           unsigned TailOffset) const;
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) const;
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
+  virtual void replaceWhitespaceBefore(unsigned LineIndex,
+                                       unsigned InPPDirective,
+                                       WhitespaceManager &Whitespaces);
  
  private:
-  unsigned OriginalStartColumn;
-  unsigned CommonPrefixLength;
-};
+  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
+  // so that all whitespace between the lines is accounted to Lines[LineIndex]
+  // as leading whitespace:
+  // - Lines[LineIndex] points to the text after that whitespace
+  // - Lines[LineIndex-1] shrinks by its trailing whitespace
+  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
+  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
+  //
+  // Sets StartOfLineColumn to the intended column in which the text at
+  // Lines[LineIndex] starts (note that the decoration, if present, is not
+  // considered part of the text).
+  void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
+                        int IndentDelta);
+
+  // Returns the column at which the text in line LineIndex starts, when broken
+  // at TailOffset. Note that the decoration (if present) is not considered part
+  // of the text.
+  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
+
+  // Contains the text of the lines of the block comment, excluding the leading
+  // /* in the first line and trailing */ in the last line, and excluding all
+  // trailing whitespace between the lines. Note that the decoration (if
+  // present) is also not considered part of the text.
+  SmallVector<StringRef, 16> Lines;
  
-class BreakableLineComment : public BreakableComment {
-public:
-  BreakableLineComment(const SourceManager &SourceMgr,
-                       const AnnotatedToken &Token, unsigned StartColumn);
+  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
+  // front of Lines[i]. Note that this can include "* " sequences, which we
+  // regard as whitespace when all lines have a "*" prefix.
+  SmallVector<unsigned, 16> LeadingWhitespace;
+
+  // StartOfLineColumn[i] is the target column at which Line[i] should be.
+  // Note that this excludes a leading "* " or "*" in case all lines have
+  // a "*" prefix.
+  SmallVector<unsigned, 16> StartOfLineColumn;
+
+  // The column at which the text of a broken line should start.
+  // Note that an optional decoration would go before that column.
+  // IndentAtLineBreak is a uniform position for all lines in a block comment,
+  // regardless of their relative position.
+  // FIXME: Revisit the decision to do this; the main reason was to support
+  // patterns like
+  // /**************//**
+  //  * Comment
+  // We could also support such patterns by special casing the first line
+  // instead.
+  unsigned IndentAtLineBreak;
  
-private:
-  static StringRef getLineCommentPrefix(StringRef Comment);
+  // Either "* " if all lines begin with a "*", or empty.
+  StringRef Decoration;
  };
  
  } // namespace format
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp

index b4ea90462a83f613742ffa1b1de6b43cb06dcfec..ea8a0f9f7d10386f1ad3e67a76d93483026c758c 100644 (file)
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -802,6 +802,9 @@ private:
      unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
      llvm::OwningPtr<BreakableToken> Token;
      unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+    unsigned OriginalStartColumn = SourceMgr.getSpellingColumnNumber(
+        Current.FormatTok.getStartOfNonWhitespace()) - 1;
+
      if (Current.is(tok::string_literal) &&
          Current.Type != TT_ImplicitStringLiteral) {
        // Only break up default narrow strings.
@@ -810,18 +813,16 @@ private:
        if (!LiteralData || *LiteralData != '"')
          return 0;
  
-      Token.reset(new BreakableStringLiteral(SourceMgr, Current.FormatTok,
-                                             StartColumn));
+      Token.reset(new BreakableStringLiteral(Current.FormatTok, StartColumn));
      } else if (Current.Type == TT_BlockComment) {
        BreakableBlockComment *BBC =
-          new BreakableBlockComment(SourceMgr, Current, StartColumn);
-      if (!DryRun)
-        BBC->alignLines(Whitespaces);
+          new BreakableBlockComment(Style, Current.FormatTok, StartColumn,
+                                    OriginalStartColumn, !Current.Parent);
        Token.reset(BBC);
      } else if (Current.Type == TT_LineComment &&
                 (Current.Parent == NULL ||
                  Current.Parent->Type != TT_ImplicitStringLiteral)) {
-      Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
+      Token.reset(new BreakableLineComment(Current.FormatTok, StartColumn));
      } else {
        return 0;
      }
@@ -832,8 +833,12 @@ private:
      bool BreakInserted = false;
      unsigned Penalty = 0;
      unsigned PositionAfterLastLineInToken = 0;
-    for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
-         ++LineIndex) {
+    for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
+         LineIndex != EndIndex; ++LineIndex) {
+      if (!DryRun) {
+        Token->replaceWhitespaceBefore(LineIndex, Line.InPPDirective,
+                                       Whitespaces);
+      }
        unsigned TailOffset = 0;
        unsigned RemainingTokenLength =
            Token->getLineLengthAfterSplit(LineIndex, TailOffset);
@@ -845,8 +850,7 @@ private:
          assert(Split.first != 0);
          unsigned NewRemainingTokenLength = Token->getLineLengthAfterSplit(
              LineIndex, TailOffset + Split.first + Split.second);
-        if (NewRemainingTokenLength >= RemainingTokenLength)
-          break;
+        assert(NewRemainingTokenLength < RemainingTokenLength);
          if (!DryRun) {
            Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
                               Whitespaces);
@@ -857,9 +861,6 @@ private:
          BreakInserted = true;
        }
        PositionAfterLastLineInToken = RemainingTokenLength;
-      if (!DryRun) {
-        Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
-      }
      }
  
      if (BreakInserted) {
@@ -1084,8 +1085,8 @@ private:
  class LexerBasedFormatTokenSource : public FormatTokenSource {
  public:
    LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
-      : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
-        IdentTable(Lex.getLangOpts()) {
+      : GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
+        SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()) {
      Lex.SetKeepWhitespaceMode(true);
    }
  
@@ -1102,12 +1103,13 @@ public:
      FormatTok = FormatToken();
      Lex.LexFromRawLexer(FormatTok.Tok);
      StringRef Text = rawTokenText(FormatTok.Tok);
-    SourceLocation WhitespaceStart = FormatTok.Tok.getLocation();
+    SourceLocation WhitespaceStart =
+        FormatTok.Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
      if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
        FormatTok.IsFirst = true;
  
      // Consume and record whitespace until we find a significant token.
-    unsigned WhitespaceLength = 0;
+    unsigned WhitespaceLength = TrailingWhitespace;
      while (FormatTok.Tok.is(tok::unknown)) {
        unsigned Newlines = Text.count('\n');
        if (Newlines > 0)
@@ -1130,9 +1132,10 @@ public:
      // Now FormatTok is the next non-whitespace token.
      FormatTok.TokenLength = Text.size();
  
+    TrailingWhitespace = 0;
      if (FormatTok.Tok.is(tok::comment)) {
-      FormatTok.TrailingWhiteSpaceLength = Text.size() - Text.rtrim().size();
-      FormatTok.TokenLength -= FormatTok.TrailingWhiteSpaceLength;
+      TrailingWhitespace = Text.size() - Text.rtrim().size();
+      FormatTok.TokenLength -= TrailingWhitespace;
      }
  
      // In case the token starts with escaped newlines, we want to
@@ -1163,6 +1166,9 @@ public:
  
      FormatTok.WhitespaceRange = SourceRange(
          WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+    FormatTok.TokenText = StringRef(
+        SourceMgr.getCharacterData(FormatTok.getStartOfNonWhitespace()),
+        FormatTok.TokenLength);
      return FormatTok;
    }
  
@@ -1171,6 +1177,7 @@ public:
  private:
    FormatToken FormatTok;
    bool GreaterStashed;
+  unsigned TrailingWhitespace;
    Lexer &Lex;
    SourceManager &SourceMgr;
    IdentifierTable IdentTable;
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h

index c738ef13c6d41e8edeb5f65d669fbfe192ef24c7..3359745a9ff2efeb38b0c32796552d17433083b8 100644 (file)
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -28,9 +28,8 @@ namespace format {
  /// whitespace characters preceeding it.
  struct FormatToken {
    FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false),
-        LastNewlineOffset(0), TokenLength(0), IsFirst(false),
-        MustBreakBefore(false), TrailingWhiteSpaceLength(0) {}
+      : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
+        TokenLength(0), IsFirst(false), MustBreakBefore(false) {}
  
    /// \brief The \c Token.
    Token Tok;
@@ -66,9 +65,6 @@ struct FormatToken {
    /// before the token.
    bool MustBreakBefore;
  
-  /// \brief Number of characters of trailing whitespace.
-  unsigned TrailingWhiteSpaceLength;
-
    /// \brief Returns actual token start location without leading escaped
    /// newlines and whitespace.
    ///
@@ -77,6 +73,12 @@ struct FormatToken {
    SourceLocation getStartOfNonWhitespace() const {
      return WhitespaceRange.getEnd();
    }
+
+  /// \brief The raw text of the token.
+  ///
+  /// Contains the raw token text without leading whitespace and without leading
+  /// escaped newlines.
+  StringRef TokenText;
  };
  
  /// \brief An unwrapped line is a sequence of \c Token, that we would like to
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp

index f7b65fc72cf2da362b333e3c0c80505cac003ef1..daf63697b0ce07bde3e139d1b30dc57506c6df66 100644 (file)
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -46,19 +46,6 @@ void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok,
        true, Tok.FormatTok.WhitespaceRange,
        Spaces, StartOfTokenColumn, Newlines, "", "", Tok.FormatTok.Tok.getKind(),
        InPPDirective && !Tok.FormatTok.IsFirst));
-
-  // Align line comments if they are trailing or if they continue other
-  // trailing comments.
-  // FIXME: Pull this out and generalize so it works the same way in broken
-  // comments and unbroken comments with trailing whitespace.
-  if (Tok.isTrailingComment()) {
-    SourceLocation TokenEndLoc = Tok.FormatTok.getStartOfNonWhitespace()
-        .getLocWithOffset(Tok.FormatTok.TokenLength);
-    // Remove the comment's trailing whitespace.
-    if (Tok.FormatTok.TrailingWhiteSpaceLength != 0)
-      Replaces.insert(tooling::Replacement(
-          SourceMgr, TokenEndLoc, Tok.FormatTok.TrailingWhiteSpaceLength, ""));
-  }
  }
  
  void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
@@ -86,12 +73,6 @@ void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset,
        tok::unknown, InPPDirective && !Tok.IsFirst));
  }
  
-void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc,
-                                       unsigned ReplaceChars, StringRef Text) {
-  Replaces.insert(
-      tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text));
-}
-
  const tooling::Replacements &WhitespaceManager::generateReplacements() {
    if (Changes.empty())
      return Replaces;
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h

index 701ecff4e055311da8adce6255ec833c1df2af21..cdd8f1d10f3f380ee5ac85f398886fd0d70c6f63 100644 (file)
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h
@@ -67,14 +67,6 @@ public:
    /// \brief Returns all the \c Replacements created during formatting.
    const tooling::Replacements &generateReplacements();
  
-  /// \brief Replaces \p ReplaceChars after \p SourceLoc with \p Text.
-  ///
-  /// FIXME: This is currently used to align comments outside of the \c
-  /// WhitespaceManager. Once this has been moved inside, get rid of this
-  /// method.
-  void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars,
-                      StringRef Text);
-
  private:
    /// \brief Represents a change before a token, a break inside a token,
    /// or the layout of an unchanged token (or whitespace within).
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp

index 73fea60519fe4a88bfd4fc908b53641ef00793f6..d237c0b2f865ba64ae40bba30b8a0cb3e8ecb855 100644 (file)
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -766,8 +766,8 @@ TEST_F(FormatTest, AlignsMultiLineComments) {
                     "           1.1.1. to keep the formatting.\n"
                     "   */"));
    EXPECT_EQ("/*\n"
-            " Don't try to outdent if there's not enough inentation.\n"
-            " */",
+            "Don't try to outdent if there's not enough inentation.\n"
+            "*/",
              format("  /*\n"
                     " Don't try to outdent if there's not enough inentation.\n"
                     " */"));
@@ -808,6 +808,65 @@ TEST_F(FormatTest, SplitsLongCxxComments) {
              format("// A comment before a macro definition\n"
                     "#define a b",
                     getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/* A comment before\n"
+            " * a macro\n"
+            " * definition */\n"
+            "#define a b",
+            format("/* A comment before a macro definition */\n"
+                   "#define a b",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/* some comment\n"
+            "     *   a comment\n"
+            "* that we break\n"
+            " * another comment\n"
+            "* we have to break\n"
+            "* a left comment\n"
+            " */",
+            format("  /* some comment\n"
+                   "       *   a comment that we break\n"
+                   "   * another comment we have to break\n"
+                   "* a left comment\n"
+                   "   */",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/*\n"
+            "\n"
+            "\n"
+            "    */\n",
+            format("  /*       \n"
+                   "      \n"
+                   "               \n"
+                   "      */\n"));
+}
+
+TEST_F(FormatTest, MultiLineCommentsInDefines) {
+  // FIXME: The line breaks are still suboptimal (current guess
+  // is that this is due to the token length being misused), but
+  // the comment handling is correct.
+  EXPECT_EQ("#define A(      \\\n"
+            "    x) /*       \\\n"
+            "a comment       \\\n"
+            "inside */       \\\n"
+            "  f();",
+            format("#define A(x) /* \\\n"
+                   "  a comment     \\\n"
+                   "  inside */     \\\n"
+                   "  f();",
+                   getLLVMStyleWithColumns(17)));
+  EXPECT_EQ("#define A(x) /* \\\n"
+            "        a       \\\n"
+            "        comment \\\n"
+            "        inside  \\\n"
+            "        */      \\\n"
+            "  f();",
+            format("#define A(      \\\n"
+                   "    x) /*       \\\n"
+                   "  a comment     \\\n"
+                   "  inside */     \\\n"
+                   "  f();",
+                   getLLVMStyleWithColumns(17)));
  }
  
  TEST_F(FormatTest, ParsesCommentsAdjacentToPPDirectives) {
@@ -928,7 +987,8 @@ TEST_F(FormatTest, SplitsLongLinesInComments) {
                     "    */", getLLVMStyleWithColumns(20)));
    EXPECT_EQ("{\n"
              "  if (something) /* This is a\n"
-            "long comment */\n"
+            "                    long\n"
+            "                    comment */\n"
              "    ;\n"
              "}",
              format("{\n"
@@ -4435,6 +4495,35 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
                 "\t\t    parameter2); \\\n"
                 "\t}",
                 Tab);
+  EXPECT_EQ("/*\n"
+            "\t      a\t\tcomment\n"
+            "\t      in multiple lines\n"
+            "       */",
+            format("   /*\t \t \n"
+                   " \t \t a\t\tcomment\t \t\n"
+                   " \t \t in multiple lines\t\n"
+                   " \t  */",
+                   Tab));
+  Tab.UseTab = false;
+  // FIXME: Change this test to a different tab size than
+  // 8 once configurable.
+  EXPECT_EQ("/*\n"
+            "              a\t\tcomment\n"
+            "              in multiple lines\n"
+            "       */",
+            format("   /*\t \t \n"
+                   " \t \t a\t\tcomment\t \t\n"
+                   " \t \t in multiple lines\t\n"
+                   " \t  */",
+                   Tab));
+
+  // FIXME: This is broken, as the spelling column number we
+  // get from the SourceManager counts tab as '1'.
+  // EXPECT_EQ("/* some\n"
+  //           "   comment */",
+  //          format(" \t \t /* some\n"
+  //                 " \t \t    comment */",
+  //                 Tab));
  }
  
  TEST_F(FormatTest, LinuxBraceBreaking) {
author	Manuel Klimek <klimek@google.com>
	Mon, 27 May 2013 15:23:34 +0000 (15:23 +0000)
committer	Manuel Klimek <klimek@google.com>
	Mon, 27 May 2013 15:23:34 +0000 (15:23 +0000)
lib/Format/BreakableToken.cpp		patch \| blob \| history
lib/Format/BreakableToken.h		patch \| blob \| history
lib/Format/Format.cpp		patch \| blob \| history
lib/Format/UnwrappedLineParser.h		patch \| blob \| history
lib/Format/WhitespaceManager.cpp		patch \| blob \| history
lib/Format/WhitespaceManager.h		patch \| blob \| history
unittests/Format/FormatTest.cpp		patch \| blob \| history