Unified token breaking logic for strings and block comments.

author Alexander Kornienko <alexfh@google.com>

Mon, 15 Apr 2013 14:28:00 +0000 (14:28 +0000)

committer Alexander Kornienko <alexfh@google.com>

Mon, 15 Apr 2013 14:28:00 +0000 (14:28 +0000)
author Alexander Kornienko <alexfh@google.com>
Mon, 15 Apr 2013 14:28:00 +0000 (14:28 +0000)
committer Alexander Kornienko <alexfh@google.com>
Mon, 15 Apr 2013 14:28:00 +0000 (14:28 +0000)
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp

new file mode 100644 (file)

index 0000000..4ec3de9
--- /dev/null
+++ b/lib/Format/BreakableToken.cpp
@@ -0,0 +1,161 @@
+//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Contains implementation of BreakableToken class and classes derived
+/// from it.
+///
+//===----------------------------------------------------------------------===//
+
+#include "BreakableToken.h"
+#include <algorithm>
+
+namespace clang {
+namespace format {
+
+BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
+                                             const AnnotatedToken &Token,
+                                             unsigned StartColumn)
+    : Tok(Token.FormatTok), StartColumn(StartColumn) {
+
+  SourceLocation TokenLoc = Tok.Tok.getLocation();
+  TokenText = StringRef(SourceMgr.getCharacterData(TokenLoc), Tok.TokenLength);
+  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+
+  OriginalStartColumn = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
+
+  TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
+
+  NeedsStar = true;
+  CommonPrefixLength = UINT_MAX;
+  if (Lines.size() == 1) {
+    if (Token.Parent == 0) {
+      // Standalone block comments will be aligned and prefixed with *s.
+      CommonPrefixLength = OriginalStartColumn + 1;
+    } else {
+      // Trailing comments can start on arbitrary column, and available
+      // horizontal space can be too small to align consecutive lines with
+      // the first one. We could, probably, align them to current
+      // indentation level, but now we just wrap them without indentation
+      // and stars.
+      CommonPrefixLength = 0;
+      NeedsStar = false;
+    }
+  } else {
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
+      if (FirstNonWhitespace != StringRef::npos) {
+        NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
+        CommonPrefixLength =
+            std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
+      }
+    }
+  }
+  if (CommonPrefixLength == UINT_MAX)
+    CommonPrefixLength = 0;
+
+  IndentAtLineBreak =
+      std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+}
+
+void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
+  SourceLocation TokenLoc = Tok.Tok.getLocation();
+  int IndentDelta = StartColumn - OriginalStartColumn;
+  if (IndentDelta > 0) {
+    std::string WhiteSpace(IndentDelta, ' ');
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      Whitespaces.addReplacement(
+          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
+          WhiteSpace);
+    }
+  } else if (IndentDelta < 0) {
+    std::string WhiteSpace(-IndentDelta, ' ');
+    // Check that the line is indented enough.
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      if (!Lines[i].startswith(WhiteSpace))
+        return;
+    }
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      Whitespaces.addReplacement(
+          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
+          -IndentDelta, "");
+    }
+  }
+
+  for (unsigned i = 1; i < Lines.size(); ++i)
+    Lines[i] = Lines[i].substr(CommonPrefixLength + (NeedsStar ? 2 : 0));
+}
+
+BreakableToken::Split BreakableBlockComment::getSplit(unsigned LineIndex,
+                                                      unsigned TailOffset,
+                                                      unsigned ColumnLimit) {
+  StringRef Text = getLine(LineIndex).substr(TailOffset);
+  unsigned DecorationLength =
+      (TailOffset == 0 && LineIndex == 0) ? StartColumn + 2 : getPrefixLength();
+  if (ColumnLimit <= DecorationLength + 1)
+    return Split(StringRef::npos, 0);
+
+  unsigned MaxSplit = ColumnLimit - DecorationLength + 1;
+  StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+  if (SpaceOffset == StringRef::npos ||
+      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
+    SpaceOffset = Text.find(' ', MaxSplit);
+  }
+  if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
+    StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
+    StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
+    return BreakableToken::Split(BeforeCut.size(),
+                                 AfterCut.begin() - BeforeCut.end());
+  }
+  return BreakableToken::Split(StringRef::npos, 0);
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+                                        Split Split, bool InPPDirective,
+                                        WhitespaceManager &Whitespaces) {
+  StringRef Text = getLine(LineIndex).substr(TailOffset);
+  StringRef AdditionalPrefix = NeedsStar ? "* " : "";
+  if (Text.size() == Split.first + Split.second) {
+    // For all but the last line handle trailing space separately.
+    if (LineIndex < Lines.size() - 1)
+      return;
+    // For the last line we need to break before "*/", but not to add "* ".
+    AdditionalPrefix = "";
+  }
+
+  unsigned WhitespaceStartColumn =
+      Split.first +
+      (LineIndex == 0 && TailOffset == 0 ? StartColumn + 2 : getPrefixLength());
+  unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
+  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
+                         InPPDirective, IndentAtLineBreak,
+                         WhitespaceStartColumn);
+}
+
+void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
+                                     unsigned InPPDirective,
+                                     WhitespaceManager &Whitespaces) {
+  if (LineIndex == Lines.size() - 1)
+    return;
+  StringRef Text = Lines[LineIndex].substr(TailOffset);
+  if (!Text.endswith(" ") && !InPPDirective)
+    return;
+
+  StringRef TrimmedLine = Text.rtrim();
+  unsigned WhitespaceStartColumn =
+      getLineLengthAfterSplit(LineIndex, TailOffset);
+  unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
+  unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
+  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
+                         0, WhitespaceStartColumn);
+}
+
+} // namespace format
+} // namespace clang
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h

new file mode 100644 (file)

index 0000000..0609104
--- /dev/null
+++ b/lib/Format/BreakableToken.h
@@ -0,0 +1,226 @@
+//===--- BreakableToken.h - Format C++ code -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Declares BreakableToken, BreakableStringLiteral, and
+/// BreakableBlockComment classes, that contain token type-specific logic to
+/// break long lines in tokens.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
+#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
+
+#include "TokenAnnotator.h"
+#include "WhitespaceManager.h"
+#include <utility>
+
+namespace clang {
+namespace format {
+
+class BreakableToken {
+public:
+  virtual ~BreakableToken() {}
+  virtual unsigned getLineCount() const = 0;
+  virtual unsigned getLineSize(unsigned Index) = 0;
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) = 0;
+  virtual unsigned getPrefixLength() = 0;
+  virtual unsigned getSuffixLength(unsigned LineIndex) = 0;
+
+  // Contains starting character index and length of split.
+  typedef std::pair<StringRef::size_type, unsigned> Split;
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) = 0;
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective,
+                           WhitespaceManager &Whitespaces) = 0;
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective,
+                        WhitespaceManager &Whitespaces) = 0;
+};
+
+class BreakableStringLiteral : public BreakableToken {
+public:
+  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn)
+      : Tok(Tok), StartColumn(StartColumn) {}
+
+  virtual unsigned getLineCount() const { return 1; }
+
+  virtual unsigned getLineSize(unsigned Index) {
+    return Tok.TokenLength - 2; // Should be in sync with getLine
+  }
+
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) {
+    return getPrefixLength() + getLine(LineIndex).size() - TailOffset +
+           getSuffixLength(LineIndex);
+  }
+
+  virtual unsigned getPrefixLength() { return StartColumn + 1; }
+
+  virtual unsigned getSuffixLength(unsigned LineIndex) { return 1; }
+
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) {
+    StringRef Text = getLine(LineIndex).substr(TailOffset);
+    unsigned DecorationLength = getPrefixLength() + getSuffixLength(0);
+    if (ColumnLimit <= DecorationLength)
+      return Split(StringRef::npos, 0);
+    unsigned MaxSplit = ColumnLimit - DecorationLength;
+    assert(MaxSplit < Text.size());
+    StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+      return Split(SpaceOffset + 1, 0);
+    StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
+    if (SlashOffset != StringRef::npos && SlashOffset != 0)
+      return Split(SlashOffset + 1, 0);
+    StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
+    if (SplitPoint != StringRef::npos && SplitPoint > 1)
+      // Do not split at 0.
+      return Split(SplitPoint, 0);
+    return Split(StringRef::npos, 0);
+  }
+
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces) {
+    unsigned WhitespaceStartColumn = StartColumn + Split.first + 2;
+    Whitespaces.breakToken(Tok, TailOffset + Split.first + 1, Split.second,
+                           "\"", "\"", InPPDirective, StartColumn,
+                           WhitespaceStartColumn);
+  }
+
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective,
+                        WhitespaceManager &Whitespaces) {}
+
+private:
+  StringRef getLine(unsigned Index) {
+    // Get string without quotes.
+    // FIXME: Handle string prefixes.
+    return StringRef(Tok.Tok.getLiteralData() + 1, Tok.TokenLength - 2);
+  }
+
+  static StringRef::size_type getStartOfCharacter(StringRef Text,
+                                                  StringRef::size_type Offset) {
+    StringRef::size_type NextEscape = Text.find('\\');
+    while (NextEscape != StringRef::npos && NextEscape < Offset) {
+      StringRef::size_type SequenceLength =
+          getEscapeSequenceLength(Text.substr(NextEscape));
+      if (Offset < NextEscape + SequenceLength)
+        return NextEscape;
+      NextEscape = Text.find('\\', NextEscape + SequenceLength);
+    }
+    return Offset;
+  }
+
+  static unsigned getEscapeSequenceLength(StringRef Text) {
+    assert(Text[0] == '\\');
+    if (Text.size() < 2)
+      return 1;
+
+    switch (Text[1]) {
+    case 'u':
+      return 6;
+    case 'U':
+      return 10;
+    case 'x':
+      return getHexLength(Text);
+    default:
+      if (Text[1] >= '0' && Text[1] <= '7')
+        return getOctalLength(Text);
+      return 2;
+    }
+  }
+
+  static unsigned getHexLength(StringRef Text) {
+    unsigned I = 2; // Point after '\x'.
+    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+                               (Text[I] >= 'a' && Text[I] <= 'f') ||
+                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
+      ++I;
+    }
+    return I;
+  }
+
+  static unsigned getOctalLength(StringRef Text) {
+    unsigned I = 1;
+    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+      ++I;
+    }
+    return I;
+  }
+
+  const FormatToken &Tok;
+  unsigned StartColumn;
+};
+
+class BreakableBlockComment : public BreakableToken {
+public:
+  BreakableBlockComment(const SourceManager &SourceMgr,
+                        const AnnotatedToken &Token, unsigned StartColumn);
+
+  void alignLines(WhitespaceManager &Whitespaces);
+
+  virtual unsigned getLineCount() const { return Lines.size(); }
+
+  virtual unsigned getLineSize(unsigned Index) {
+    return getLine(Index).size();
+  }
+
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) {
+    unsigned ContentStartColumn = getPrefixLength();
+    if (TailOffset == 0 && LineIndex == 0)
+      ContentStartColumn = StartColumn + 2;
+    return ContentStartColumn + getLine(LineIndex).size() - TailOffset +
+           getSuffixLength(LineIndex);
+  }
+
+  virtual unsigned getPrefixLength() {
+    return IndentAtLineBreak + (NeedsStar ? 2 : 0);
+  }
+
+  virtual unsigned getSuffixLength(unsigned LineIndex) {
+    if (LineIndex + 1 < Lines.size())
+      return 0;
+    return 2;
+  }
+
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit);
+
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
+
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective, WhitespaceManager &Whitespaces);
+
+private:
+  // Get comment lines without /* */, common prefix and trailing whitespace.
+  // Last line is not trimmed, as it is terminated by */, so its trailing
+  // whitespace is not really trailing.
+  StringRef getLine(unsigned Index) {
+    return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index];
+  }
+
+  const FormatToken &Tok;
+  const unsigned StartColumn;
+  StringRef TokenText;
+  unsigned OriginalStartColumn;
+  unsigned CommonPrefixLength;
+  unsigned IndentAtLineBreak;
+  bool NeedsStar;
+  SmallVector<StringRef, 16> Lines;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt

index d8630eeeead3cd828ad30a06ebbfcfd055994fd3..560e38b4bfaa4694c31d799335d13455e2c876d2 100644 (file)
--- a/lib/Format/CMakeLists.txt
+++ b/lib/Format/CMakeLists.txt
@@ -1,9 +1,11 @@
  set(LLVM_LINK_COMPONENTS support)
  
  add_clang_library(clangFormat
+  BreakableToken.cpp
+  Format.cpp
    TokenAnnotator.cpp
    UnwrappedLineParser.cpp
-  Format.cpp
+  WhitespaceManager.cpp
    )
  
  add_dependencies(clangFormat
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp

index d0dfdceddc0d0397f12ba842819fd6ce2e183d3d..f93509e18c31976023d2759c1e55243dc57c05e6 100644 (file)
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -15,8 +15,10 @@
  
  #define DEBUG_TYPE "format-formatter"
  
+#include "BreakableToken.h"
  #include "TokenAnnotator.h"
  #include "UnwrappedLineParser.h"
+#include "WhitespaceManager.h"
  #include "clang/Basic/Diagnostic.h"
  #include "clang/Basic/OperatorPrecedence.h"
  #include "clang/Basic/SourceManager.h"
@@ -93,367 +95,6 @@ static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
    return End->TotalLength - Tok.TotalLength + 1;
  }
  
-static size_t
-calculateColumnLimit(const FormatStyle &Style, bool InPPDirective) {
-  // In preprocessor directives reserve two chars for trailing " \"
-  return Style.ColumnLimit - (InPPDirective ? 2 : 0);
-}
-
-/// \brief Manages the whitespaces around tokens and their replacements.
-///
-/// This includes special handling for certain constructs, e.g. the alignment of
-/// trailing line comments.
-class WhitespaceManager {
-public:
-  WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style)
-      : SourceMgr(SourceMgr), Style(Style) {}
-
-  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
-  /// each \c AnnotatedToken.
-  void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
-                         unsigned Spaces, unsigned WhitespaceStartColumn) {
-    // 2+ newlines mean an empty line separating logic scopes.
-    if (NewLines >= 2)
-      alignComments();
-
-    SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
-    bool LineExceedsColumnLimit = Spaces + WhitespaceStartColumn +
-                                  Tok.FormatTok.TokenLength > Style.ColumnLimit;
-
-    // Align line comments if they are trailing or if they continue other
-    // trailing comments.
-    if (Tok.isTrailingComment()) {
-      // Remove the comment's trailing whitespace.
-      if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Tok.FormatTok.TokenLength),
-            Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
-
-      // Align comment with other comments.
-      if ((Tok.Parent != NULL || !Comments.empty()) &&
-          !LineExceedsColumnLimit) {
-        StoredComment Comment;
-        Comment.Tok = Tok.FormatTok;
-        Comment.Spaces = Spaces;
-        Comment.NewLines = NewLines;
-        Comment.MinColumn =
-            NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
-        Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
-        Comment.Untouchable = false;
-        Comments.push_back(Comment);
-        return;
-      }
-    }
-
-    // If this line does not have a trailing comment, align the stored comments.
-    if (Tok.Children.empty() && !Tok.isTrailingComment())
-      alignComments();
-
-    if (Tok.Type == TT_BlockComment) {
-      indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, false);
-    } else if (Tok.Type == TT_LineComment && LineExceedsColumnLimit) {
-      StringRef Line(SourceMgr.getCharacterData(TokenLoc),
-                     Tok.FormatTok.TokenLength);
-      int StartColumn = Spaces + (NewLines == 0 ? WhitespaceStartColumn : 0);
-      StringRef Prefix = getLineCommentPrefix(Line);
-      std::string NewPrefix = std::string(StartColumn, ' ') + Prefix.str();
-      splitLineInComment(Tok.FormatTok, Line.substr(Prefix.size()),
-                         StartColumn + Prefix.size(), NewPrefix,
-                         /*InPPDirective=*/ false,
-                         /*CommentHasMoreLines=*/ false);
-    }
-
-    storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
-  }
-
-  /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
-  /// backslashes to escape newlines inside a preprocessor directive.
-  ///
-  /// This function and \c replaceWhitespace have the same behavior if
-  /// \c Newlines == 0.
-  void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
-                           unsigned Spaces, unsigned WhitespaceStartColumn) {
-    if (Tok.Type == TT_BlockComment)
-      indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, true);
-
-    storeReplacement(Tok.FormatTok,
-                     getNewLineText(NewLines, Spaces, WhitespaceStartColumn));
-  }
-
-  /// \brief Inserts a line break into the middle of a token.
-  ///
-  /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
-  /// break and \p Postfix before the rest of the token starts in the next line.
-  ///
-  /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
-  /// used to generate the correct line break.
-  void breakToken(const FormatToken &Tok, unsigned Offset,
-                  unsigned ReplaceChars, StringRef Prefix, StringRef Postfix,
-                  bool InPPDirective, unsigned Spaces,
-                  unsigned WhitespaceStartColumn) {
-    std::string NewLineText;
-    if (!InPPDirective)
-      NewLineText = getNewLineText(1, Spaces);
-    else
-      NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn);
-    std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
-    SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset);
-    Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars,
-                                         ReplacementText));
-  }
-
-  /// \brief Returns all the \c Replacements created during formatting.
-  const tooling::Replacements &generateReplacements() {
-    alignComments();
-    return Replaces;
-  }
-
-  void addUntouchableComment(unsigned Column) {
-    StoredComment Comment;
-    Comment.MinColumn = Column;
-    Comment.MaxColumn = Column;
-    Comment.Untouchable = true;
-    Comments.push_back(Comment);
-  }
-
-private:
-  static StringRef getLineCommentPrefix(StringRef Comment) {
-    const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
-    for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
-      if (Comment.startswith(KnownPrefixes[i]))
-        return KnownPrefixes[i];
-    return "";
-  }
-
-  /// \brief Finds a common prefix of lines of a block comment to properly
-  /// indent (and possibly decorate with '*'s) added lines.
-  ///
-  /// The first line is ignored (it's special and starts with /*). The number of
-  /// lines should be more than one.
-  static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines,
-                                          const char *PrefixChars = " *") {
-    assert(Lines.size() > 1);
-    StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars));
-    for (size_t i = 2; i < Lines.size(); ++i) {
-      for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) {
-        if (Prefix[j] != Lines[i][j]) {
-          Prefix = Prefix.substr(0, j);
-          break;
-        }
-      }
-    }
-    return Prefix;
-  }
-
-  /// \brief Splits one line in a line or block comment, if it doesn't fit to
-  /// provided column limit. Removes trailing whitespace in each line.
-  ///
-  /// \param Line points to the line contents without leading // or /*.
-  ///
-  /// \param StartColumn is the column where the first character of Line will be
-  /// located after formatting.
-  ///
-  /// \param LinePrefix is inserted after each line break.
-  ///
-  /// When \param InPPDirective is true, each line break will be preceded by a
-  /// backslash in the last column to make line breaks inside the comment
-  /// visually consistent with line breaks outside the comment. This only makes
-  /// sense for block comments.
-  ///
-  /// When \param CommentHasMoreLines is false, no line breaks/trailing
-  /// backslashes will be inserted after it.
-  void splitLineInComment(const FormatToken &Tok, StringRef Line,
-                          size_t StartColumn, StringRef LinePrefix,
-                          bool InPPDirective, bool CommentHasMoreLines,
-                          const char *WhiteSpaceChars = " ") {
-    size_t ColumnLimit = calculateColumnLimit(Style, InPPDirective);
-    const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation());
-
-    StringRef TrimmedLine = Line.rtrim();
-    int TrailingSpaceLength = Line.size() - TrimmedLine.size();
-
-    // Don't touch leading whitespace.
-    Line = TrimmedLine.ltrim();
-    StartColumn += TrimmedLine.size() - Line.size();
-
-    while (Line.size() + StartColumn > ColumnLimit) {
-      // Try to break at the last whitespace before the column limit.
-      size_t SpacePos =
-          Line.find_last_of(WhiteSpaceChars, ColumnLimit - StartColumn + 1);
-      if (SpacePos == StringRef::npos) {
-        // Try to find any whitespace in the line.
-        SpacePos = Line.find_first_of(WhiteSpaceChars);
-        if (SpacePos == StringRef::npos) // No whitespace found, give up.
-          break;
-      }
-
-      StringRef NextCut = Line.substr(0, SpacePos).rtrim();
-      StringRef RemainingLine = Line.substr(SpacePos).ltrim();
-      if (RemainingLine.empty())
-        break;
-
-      if (RemainingLine == "*/" && LinePrefix.endswith("* "))
-        LinePrefix = LinePrefix.substr(0, LinePrefix.size() - 2);
-
-      Line = RemainingLine;
-
-      size_t ReplaceChars = Line.begin() - NextCut.end();
-      breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix,
-                 InPPDirective, 0, NextCut.size() + StartColumn);
-      StartColumn = LinePrefix.size();
-    }
-
-    if (TrailingSpaceLength > 0 || (InPPDirective && CommentHasMoreLines)) {
-      // Remove trailing whitespace/insert backslash. + 1 is for \n
-      breakToken(Tok, Line.end() - TokenStart, TrailingSpaceLength + 1, "", "",
-                 InPPDirective, 0, Line.size() + StartColumn);
-    }
-  }
-
-  /// \brief Changes indentation of all lines in a block comment by Indent,
-  /// removes trailing whitespace from each line, splits lines that end up
-  /// exceeding the column limit.
-  void indentBlockComment(const AnnotatedToken &Tok, int Indent,
-                          int WhitespaceStartColumn, int NewLines,
-                          bool InPPDirective) {
-    assert(Tok.Type == TT_BlockComment);
-    int StartColumn = Indent + (NewLines == 0 ? WhitespaceStartColumn : 0);
-    const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
-    const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
-    const int IndentDelta = Indent - CurrentIndent;
-    const StringRef Text(SourceMgr.getCharacterData(TokenLoc),
-                         Tok.FormatTok.TokenLength);
-    assert(Text.startswith("/*") && Text.endswith("*/"));
-
-    SmallVector<StringRef, 16> Lines;
-    Text.split(Lines, "\n");
-
-    if (IndentDelta > 0) {
-      std::string WhiteSpace(IndentDelta, ' ');
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
-            0, WhiteSpace));
-      }
-    } else if (IndentDelta < 0) {
-      std::string WhiteSpace(-IndentDelta, ' ');
-      // Check that the line is indented enough.
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        if (!Lines[i].startswith(WhiteSpace))
-          return;
-      }
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
-            -IndentDelta, ""));
-      }
-    }
-
-    // Split long lines in comments.
-    size_t OldPrefixSize = 0;
-    std::string NewPrefix;
-    if (Lines.size() > 1) {
-      StringRef CurrentPrefix = findCommentLinesPrefix(Lines);
-      OldPrefixSize = CurrentPrefix.size();
-      NewPrefix = (IndentDelta < 0)
-                  ? CurrentPrefix.substr(-IndentDelta).str()
-                  : std::string(IndentDelta, ' ') + CurrentPrefix.str();
-      if (CurrentPrefix.endswith("*")) {
-        NewPrefix += " ";
-        ++OldPrefixSize;
-      }
-    } else if (Tok.Parent == 0) {
-      NewPrefix = std::string(StartColumn, ' ') + " * ";
-    }
-
-    StartColumn += 2;
-    for (size_t i = 0; i < Lines.size(); ++i) {
-      StringRef Line = Lines[i].substr(i == 0 ? 2 : OldPrefixSize);
-      splitLineInComment(Tok.FormatTok, Line, StartColumn, NewPrefix,
-                         InPPDirective, i != Lines.size() - 1);
-      StartColumn = NewPrefix.size();
-    }
-  }
-
-  std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
-    return std::string(NewLines, '\n') + std::string(Spaces, ' ');
-  }
-
-  std::string getNewLineText(unsigned NewLines, unsigned Spaces,
-                             unsigned WhitespaceStartColumn) {
-    std::string NewLineText;
-    if (NewLines > 0) {
-      unsigned Offset =
-          std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
-      for (unsigned i = 0; i < NewLines; ++i) {
-        NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
-        NewLineText += "\\\n";
-        Offset = 0;
-      }
-    }
-    return NewLineText + std::string(Spaces, ' ');
-  }
-
-  /// \brief Structure to store a comment for later layout and alignment.
-  struct StoredComment {
-    FormatToken Tok;
-    unsigned MinColumn;
-    unsigned MaxColumn;
-    unsigned NewLines;
-    unsigned Spaces;
-    bool Untouchable;
-  };
-  SmallVector<StoredComment, 16> Comments;
-  typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
-
-  /// \brief Try to align all stashed comments.
-  void alignComments() {
-    unsigned MinColumn = 0;
-    unsigned MaxColumn = UINT_MAX;
-    comment_iterator Start = Comments.begin();
-    for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) {
-      if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
-        alignComments(Start, I, MinColumn);
-        MinColumn = I->MinColumn;
-        MaxColumn = I->MaxColumn;
-        Start = I;
-      } else {
-        MinColumn = std::max(MinColumn, I->MinColumn);
-        MaxColumn = std::min(MaxColumn, I->MaxColumn);
-      }
-    }
-    alignComments(Start, Comments.end(), MinColumn);
-    Comments.clear();
-  }
-
-  /// \brief Put all the comments between \p I and \p E into \p Column.
-  void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
-    while (I != E) {
-      if (!I->Untouchable) {
-        unsigned Spaces = I->Spaces + Column - I->MinColumn;
-        storeReplacement(I->Tok, getNewLineText(I->NewLines, Spaces));
-      }
-      ++I;
-    }
-  }
-
-  /// \brief Stores \p Text as the replacement for the whitespace in front of
-  /// \p Tok.
-  void storeReplacement(const FormatToken &Tok, const std::string Text) {
-    // Don't create a replacement, if it does not change anything.
-    if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
-                  Tok.WhiteSpaceLength) == Text)
-      return;
-
-    Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
-                                         Tok.WhiteSpaceLength, Text));
-  }
-
-  SourceManager &SourceMgr;
-  tooling::Replacements Replaces;
-  const FormatStyle &Style;
-};
-
  class UnwrappedLineFormatter {
  public:
    UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
@@ -602,7 +243,7 @@ private:
        if (StartOfFunctionCall != Other.StartOfFunctionCall)
          return StartOfFunctionCall < Other.StartOfFunctionCall;
        if (NestedNameSpecifierContinuation !=
-              Other.NestedNameSpecifierContinuation)
+          Other.NestedNameSpecifierContinuation)
          return NestedNameSpecifierContinuation <
                 Other.NestedNameSpecifierContinuation;
        if (CallContinuation != Other.CallContinuation)
@@ -647,7 +288,7 @@ private:
        if (Column != Other.Column)
          return Column < Other.Column;
        if (LineContainsContinuedForLoopSection !=
-              Other.LineContainsContinuedForLoopSection)
+          Other.LineContainsContinuedForLoopSection)
          return LineContainsContinuedForLoopSection;
        if (ParenLevel != Other.ParenLevel)
          return ParenLevel < Other.ParenLevel;
@@ -806,7 +447,7 @@ private:
        if (Current.Type == TT_ObjCSelectorName &&
            State.Stack.back().ColonPos == 0) {
          if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
-                State.Column + Spaces + Current.FormatTok.TokenLength)
+            State.Column + Spaces + Current.FormatTok.TokenLength)
            State.Stack.back().ColonPos =
                State.Stack.back().Indent + Current.LongestObjCSelectorName;
          else
@@ -970,115 +611,78 @@ private:
    /// it if possible.
    unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
                                  bool DryRun) {
-    if (Current.isNot(tok::string_literal))
+    llvm::OwningPtr<BreakableToken> Token;
+    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+    if (Current.is(tok::string_literal)) {
+      // Only break up default narrow strings.
+      const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
+      if (!LiteralData || *LiteralData != '"')
+        return 0;
+
+      Token.reset(new BreakableStringLiteral(Current.FormatTok, StartColumn));
+    } else if (Current.Type == TT_BlockComment) {
+      BreakableBlockComment *BBC =
+          new BreakableBlockComment(SourceMgr, Current, StartColumn);
+      if (!DryRun)
+        BBC->alignLines(Whitespaces);
+      Token.reset(BBC);
+    } else {
        return 0;
-    // Only break up default narrow strings.
-    const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
-    if (!LiteralData || *LiteralData != '"')
+    }
+
+    if (Token->getPrefixLength() + Token->getSuffixLength(0) >
+        getColumnLimit()) {
        return 0;
+    }
  
+    bool BreakInserted = false;
      unsigned Penalty = 0;
-    unsigned TailOffset = 0;
-    unsigned TailLength = Current.FormatTok.TokenLength;
-    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
-    unsigned OffsetFromStart = 0;
-    while (StartColumn + TailLength > getColumnLimit()) {
-      StringRef Text = StringRef(LiteralData + TailOffset, TailLength);
-      if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
-        break;
-      StringRef::size_type SplitPoint = getSplitPoint(
-          Text, getColumnLimit() - StartColumn - OffsetFromStart - 1);
-      if (SplitPoint == StringRef::npos)
-        break;
-      assert(SplitPoint != 0);
-      // +2, because 'Text' starts after the opening quotes, and does not
-      // include the closing quote we need to insert.
-      unsigned WhitespaceStartColumn =
-          StartColumn + OffsetFromStart + SplitPoint + 2;
-      State.Stack.back().LastSpace = StartColumn;
+    for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
+         ++LineIndex) {
+      unsigned TokenLineSize = Token->getLineSize(LineIndex);
+      unsigned TailOffset = 0;
+      unsigned RemainingLength =
+          Token->getLineLengthAfterSplit(LineIndex, TailOffset);
+      while (RemainingLength > getColumnLimit()) {
+        unsigned DecorationLength =
+            RemainingLength - (TokenLineSize - TailOffset);
+        if (DecorationLength + 1 > getColumnLimit()) {
+          // Can't reduce line length by splitting here.
+          break;
+        }
+        BreakableToken::Split Split =
+            Token->getSplit(LineIndex, TailOffset, getColumnLimit());
+        if (Split.first == StringRef::npos)
+          break;
+        assert(Split.first != 0);
+        if (!DryRun) {
+          Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
+                             Whitespaces);
+        }
+        TailOffset += Split.first + Split.second;
+        unsigned OldRemainingLength = RemainingLength;
+        RemainingLength = Token->getLineLengthAfterSplit(LineIndex, TailOffset);
+        assert(RemainingLength < OldRemainingLength);
+        Penalty += Style.PenaltyExcessCharacter;
+        BreakInserted = true;
+      }
+      State.Column = RemainingLength;
        if (!DryRun) {
-        Whitespaces.breakToken(Current.FormatTok, TailOffset + SplitPoint + 1,
-                               0, "\"", "\"", Line.InPPDirective, StartColumn,
-                               WhitespaceStartColumn);
+        Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
        }
-      TailOffset += SplitPoint + 1;
-      TailLength -= SplitPoint + 1;
-      OffsetFromStart = 1;
-      Penalty += Style.PenaltyExcessCharacter;
+    }
+
+    if (BreakInserted) {
        for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
          State.Stack[i].BreakBeforeParameter = true;
+      State.Stack.back().LastSpace = StartColumn;
      }
-    State.Column = StartColumn + TailLength;
      return Penalty;
    }
  
-  StringRef::size_type
-  getSplitPoint(StringRef Text, StringRef::size_type Offset) {
-    StringRef::size_type SpaceOffset = Text.rfind(' ', Offset);
-    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
-      return SpaceOffset;
-    StringRef::size_type SlashOffset = Text.rfind('/', Offset);
-    if (SlashOffset != StringRef::npos && SlashOffset != 0)
-      return SlashOffset;
-    StringRef::size_type Split = getStartOfCharacter(Text, Offset);
-    if (Split != StringRef::npos && Split > 1)
-      // Do not split at 0.
-      return Split - 1;
-    return StringRef::npos;
-  }
-
-  StringRef::size_type
-  getStartOfCharacter(StringRef Text, StringRef::size_type Offset) {
-    StringRef::size_type NextEscape = Text.find('\\');
-    while (NextEscape != StringRef::npos && NextEscape < Offset) {
-      StringRef::size_type SequenceLength =
-          getEscapeSequenceLength(Text.substr(NextEscape));
-      if (Offset < NextEscape + SequenceLength)
-        return NextEscape;
-      NextEscape = Text.find('\\', NextEscape + SequenceLength);
-    }
-    return Offset;
-  }
-
-  unsigned getEscapeSequenceLength(StringRef Text) {
-    assert(Text[0] == '\\');
-    if (Text.size() < 2)
-      return 1;
-
-    switch (Text[1]) {
-    case 'u':
-      return 6;
-    case 'U':
-      return 10;
-    case 'x':
-      return getHexLength(Text);
-    default:
-      if (Text[1] >= '0' && Text[1] <= '7')
-        return getOctalLength(Text);
-      return 2;
-    }
-  }
-
-  unsigned getHexLength(StringRef Text) {
-    unsigned I = 2; // Point after '\x'.
-    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
-                               (Text[I] >= 'a' && Text[I] <= 'f') ||
-                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
-      ++I;
-    }
-    return I;
-  }
-
-  unsigned getOctalLength(StringRef Text) {
-    unsigned I = 1;
-    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
-      ++I;
-    }
-    return I;
-  }
-
    unsigned getColumnLimit() {
-    return calculateColumnLimit(Style, Line.InPPDirective);
+    // In preprocessor directives reserve two chars for trailing " \"
+    return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
    }
  
    /// \brief An edge in the solution space from \c Previous->State to \c State,
@@ -1401,9 +1005,9 @@ public:
            AnnotatedLines[i].First.Children.empty())
          AnnotatedLines[i].Level = NextNoneCommentLine->Level;
        else
-        NextNoneCommentLine = AnnotatedLines[i].First.isNot(tok::r_brace)
-                                  ? &AnnotatedLines[i]
-                                  : NULL;
+        NextNoneCommentLine =
+            AnnotatedLines[i].First.isNot(tok::r_brace) ? &AnnotatedLines[i]
+                                                        : NULL;
      }
  
      std::vector<int> IndentForLevel;
@@ -1746,9 +1350,10 @@ private:
    std::vector<AnnotatedLine> AnnotatedLines;
  };
  
-tooling::Replacements
-reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
-         std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) {
+tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
+                               SourceManager &SourceMgr,
+                               std::vector<CharSourceRange> Ranges,
+                               DiagnosticConsumer *DiagClient) {
    IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
    OwningPtr<DiagnosticConsumer> DiagPrinter;
    if (DiagClient == 0) {
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp

new file mode 100644 (file)

index 0000000..6801e7e
--- /dev/null
+++ b/lib/Format/WhitespaceManager.cpp
@@ -0,0 +1,223 @@
+//===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements WhitespaceManager class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WhitespaceManager.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace clang {
+namespace format {
+
+void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok,
+                                          unsigned NewLines, unsigned Spaces,
+                                          unsigned WhitespaceStartColumn) {
+  // 2+ newlines mean an empty line separating logic scopes.
+  if (NewLines >= 2)
+    alignComments();
+
+  SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
+  bool LineExceedsColumnLimit =
+      Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength >
+      Style.ColumnLimit;
+
+  // Align line comments if they are trailing or if they continue other
+  // trailing comments.
+  if (Tok.isTrailingComment()) {
+    // Remove the comment's trailing whitespace.
+    if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
+      Replaces.insert(tooling::Replacement(
+          SourceMgr, TokenLoc.getLocWithOffset(Tok.FormatTok.TokenLength),
+          Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
+
+    // Align comment with other comments.
+    if ((Tok.Parent != NULL || !Comments.empty()) && !LineExceedsColumnLimit) {
+      StoredComment Comment;
+      Comment.Tok = Tok.FormatTok;
+      Comment.Spaces = Spaces;
+      Comment.NewLines = NewLines;
+      Comment.MinColumn =
+          NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
+      Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
+      Comment.Untouchable = false;
+      Comments.push_back(Comment);
+      return;
+    }
+  }
+
+  // If this line does not have a trailing comment, align the stored comments.
+  if (Tok.Children.empty() && !Tok.isTrailingComment())
+    alignComments();
+
+  if (Tok.Type == TT_LineComment && LineExceedsColumnLimit) {
+    StringRef Line(SourceMgr.getCharacterData(TokenLoc),
+                   Tok.FormatTok.TokenLength);
+    int StartColumn = Spaces + (NewLines == 0 ? WhitespaceStartColumn : 0);
+    StringRef Prefix = getLineCommentPrefix(Line);
+    std::string NewPrefix = std::string(StartColumn, ' ') + Prefix.str();
+    splitLineComment(Tok.FormatTok, Line.substr(Prefix.size()),
+                     StartColumn + Prefix.size(), NewPrefix);
+  }
+
+  storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
+}
+
+void WhitespaceManager::replacePPWhitespace(const AnnotatedToken &Tok,
+                                            unsigned NewLines, unsigned Spaces,
+                                            unsigned WhitespaceStartColumn) {
+  storeReplacement(Tok.FormatTok,
+                   getNewLineText(NewLines, Spaces, WhitespaceStartColumn));
+}
+
+void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset,
+                                   unsigned ReplaceChars, StringRef Prefix,
+                                   StringRef Postfix, bool InPPDirective,
+                                   unsigned Spaces,
+                                   unsigned WhitespaceStartColumn) {
+  std::string NewLineText;
+  if (!InPPDirective)
+    NewLineText = getNewLineText(1, Spaces);
+  else
+    NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn);
+  std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
+  SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset);
+  Replaces.insert(
+      tooling::Replacement(SourceMgr, Location, ReplaceChars, ReplacementText));
+}
+
+const tooling::Replacements &WhitespaceManager::generateReplacements() {
+  alignComments();
+  return Replaces;
+}
+
+void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc,
+                                       unsigned ReplaceChars, StringRef Text) {
+  Replaces.insert(
+      tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text));
+}
+
+void WhitespaceManager::addUntouchableComment(unsigned Column) {
+  StoredComment Comment;
+  Comment.MinColumn = Column;
+  Comment.MaxColumn = Column;
+  Comment.Untouchable = true;
+  Comments.push_back(Comment);
+}
+
+StringRef WhitespaceManager::getLineCommentPrefix(StringRef Comment) {
+  const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+  for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
+    if (Comment.startswith(KnownPrefixes[i]))
+      return KnownPrefixes[i];
+  return "";
+}
+
+void
+WhitespaceManager::splitLineComment(const FormatToken &Tok, StringRef Line,
+                                    size_t StartColumn, StringRef LinePrefix,
+                                    const char *WhiteSpaceChars /*= " "*/) {
+  const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation());
+
+  StringRef TrimmedLine = Line.rtrim();
+  // Don't touch leading whitespace.
+  Line = TrimmedLine.ltrim();
+  StartColumn += TrimmedLine.size() - Line.size();
+
+  while (Line.size() + StartColumn > Style.ColumnLimit) {
+    // Try to break at the last whitespace before the column limit.
+    size_t SpacePos =
+        Line.find_last_of(WhiteSpaceChars, Style.ColumnLimit - StartColumn + 1);
+    if (SpacePos == StringRef::npos) {
+      // Try to find any whitespace in the line.
+      SpacePos = Line.find_first_of(WhiteSpaceChars);
+      if (SpacePos == StringRef::npos) // No whitespace found, give up.
+        break;
+    }
+
+    StringRef NextCut = Line.substr(0, SpacePos).rtrim();
+    StringRef RemainingLine = Line.substr(SpacePos).ltrim();
+    if (RemainingLine.empty())
+      break;
+
+    Line = RemainingLine;
+
+    size_t ReplaceChars = Line.begin() - NextCut.end();
+    breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix,
+               false, 0, 0);
+    StartColumn = LinePrefix.size();
+  }
+}
+
+std::string WhitespaceManager::getNewLineText(unsigned NewLines,
+                                              unsigned Spaces) {
+  return std::string(NewLines, '\n') + std::string(Spaces, ' ');
+}
+
+std::string WhitespaceManager::getNewLineText(unsigned NewLines,
+                                              unsigned Spaces,
+                                              unsigned WhitespaceStartColumn) {
+  std::string NewLineText;
+  if (NewLines > 0) {
+    unsigned Offset =
+        std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
+    for (unsigned i = 0; i < NewLines; ++i) {
+      NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
+      NewLineText += "\\\n";
+      Offset = 0;
+    }
+  }
+  return NewLineText + std::string(Spaces, ' ');
+}
+
+void WhitespaceManager::alignComments() {
+  unsigned MinColumn = 0;
+  unsigned MaxColumn = UINT_MAX;
+  comment_iterator Start = Comments.begin();
+  for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) {
+    if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
+      alignComments(Start, I, MinColumn);
+      MinColumn = I->MinColumn;
+      MaxColumn = I->MaxColumn;
+      Start = I;
+    } else {
+      MinColumn = std::max(MinColumn, I->MinColumn);
+      MaxColumn = std::min(MaxColumn, I->MaxColumn);
+    }
+  }
+  alignComments(Start, Comments.end(), MinColumn);
+  Comments.clear();
+}
+
+void WhitespaceManager::alignComments(comment_iterator I, comment_iterator E,
+                                      unsigned Column) {
+  while (I != E) {
+    if (!I->Untouchable) {
+      unsigned Spaces = I->Spaces + Column - I->MinColumn;
+      storeReplacement(I->Tok, getNewLineText(I->NewLines, Spaces));
+    }
+    ++I;
+  }
+}
+
+void WhitespaceManager::storeReplacement(const FormatToken &Tok,
+                                         const std::string Text) {
+  // Don't create a replacement, if it does not change anything.
+  if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
+                Tok.WhiteSpaceLength) == Text)
+    return;
+
+  Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
+                                       Tok.WhiteSpaceLength, Text));
+}
+
+} // namespace format
+} // namespace clang
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h

new file mode 100644 (file)

index 0000000..d880069
--- /dev/null
+++ b/lib/Format/WhitespaceManager.h
@@ -0,0 +1,120 @@
+//===--- WhitespaceManager.h - Format C++ code ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WhitespaceManager class manages whitespace around tokens and their
+/// replacements.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H
+#define LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H
+
+#include "TokenAnnotator.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include <string>
+
+namespace clang {
+namespace format {
+
+/// \brief Manages the whitespaces around tokens and their replacements.
+///
+/// This includes special handling for certain constructs, e.g. the alignment of
+/// trailing line comments.
+class WhitespaceManager {
+public:
+  WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style)
+      : SourceMgr(SourceMgr), Style(Style) {}
+
+  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
+  /// each \c AnnotatedToken.
+  void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
+                         unsigned Spaces, unsigned WhitespaceStartColumn);
+
+  /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
+  /// backslashes to escape newlines inside a preprocessor directive.
+  ///
+  /// This function and \c replaceWhitespace have the same behavior if
+  /// \c Newlines == 0.
+  void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
+                           unsigned Spaces, unsigned WhitespaceStartColumn);
+
+  /// \brief Inserts a line break into the middle of a token.
+  ///
+  /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
+  /// break and \p Postfix before the rest of the token starts in the next line.
+  ///
+  /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
+  /// used to generate the correct line break.
+  void breakToken(const FormatToken &Tok, unsigned Offset,
+                  unsigned ReplaceChars, StringRef Prefix, StringRef Postfix,
+                  bool InPPDirective, unsigned Spaces,
+                  unsigned WhitespaceStartColumn);
+
+  /// \brief Returns all the \c Replacements created during formatting.
+  const tooling::Replacements &generateReplacements();
+
+  void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars,
+                      StringRef Text);
+
+  void addUntouchableComment(unsigned Column);
+
+private:
+  static StringRef getLineCommentPrefix(StringRef Comment);
+
+  /// \brief Splits one line in a line comment, if it doesn't fit to
+  /// provided column limit. Removes trailing whitespace in each line.
+  ///
+  /// \param Line points to the line contents without leading // or /*.
+  ///
+  /// \param StartColumn is the column where the first character of Line will be
+  /// located after formatting.
+  ///
+  /// \param LinePrefix is inserted after each line break.
+  void splitLineComment(const FormatToken &Tok, StringRef Line,
+                        size_t StartColumn, StringRef LinePrefix,
+                        const char *WhiteSpaceChars = " ");
+
+  std::string getNewLineText(unsigned NewLines, unsigned Spaces);
+
+  std::string getNewLineText(unsigned NewLines, unsigned Spaces,
+                             unsigned WhitespaceStartColumn);
+
+  /// \brief Structure to store a comment for later layout and alignment.
+  struct StoredComment {
+    FormatToken Tok;
+    unsigned MinColumn;
+    unsigned MaxColumn;
+    unsigned NewLines;
+    unsigned Spaces;
+    bool Untouchable;
+  };
+  SmallVector<StoredComment, 16> Comments;
+  typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
+
+  /// \brief Try to align all stashed comments.
+  void alignComments();
+
+  /// \brief Put all the comments between \p I and \p E into \p Column.
+  void alignComments(comment_iterator I, comment_iterator E, unsigned Column);
+
+  /// \brief Stores \p Text as the replacement for the whitespace in front of
+  /// \p Tok.
+  void storeReplacement(const FormatToken &Tok, const std::string Text);
+
+  SourceManager &SourceMgr;
+  tooling::Replacements Replaces;
+  const FormatStyle &Style;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp

index 8dadd032ab790633a9a3f4f21cfeea90f3a2009b..cbf5cf98b0be1cbfa02027095f09ff2e0e930069 100644 (file)
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -845,10 +845,7 @@ TEST_F(FormatTest, SplitsLongLinesInCommentsInPreprocessor) {
              "   Macro comment   \\\n"
              "   with a long     \\\n"
              "   line            \\\n"
-            // FIXME: We should look at the length of the last line of the token
-            // instead of the full token's length.
-            //"  */               \\\n"
-            "   */\\\n"
+            "   */              \\\n"
              "  A + B",
              format("#define X \\\n"
                     "  /*\n"
@@ -860,10 +857,7 @@ TEST_F(FormatTest, SplitsLongLinesInCommentsInPreprocessor) {
    EXPECT_EQ("#define X          \\\n"
              "  /* Macro comment \\\n"
              "     with a long   \\\n"
-            // FIXME: We should look at the length of the last line of the token
-            // instead of the full token's length.
-            //"   line */         \\\n"
-            "     line */\\\n"
+            "     line */       \\\n"
              "  A + B",
              format("#define X \\\n"
                     "  /* Macro comment with a long\n"
@@ -873,10 +867,7 @@ TEST_F(FormatTest, SplitsLongLinesInCommentsInPreprocessor) {
    EXPECT_EQ("#define X          \\\n"
              "  /* Macro comment \\\n"
              "   * with a long   \\\n"
-            // FIXME: We should look at the length of the last line of the token
-            // instead of the full token's length.
-            //"   * line */       \\\n"
-            "   * line */\\\n"
+            "   * line */       \\\n"
              "  A + B",
              format("#define X \\\n"
                     "  /* Macro comment with a long  line */ \\\n"
@@ -3697,7 +3688,8 @@ TEST_F(FormatTest, BreakStringLiterals) {
              "\"text\"",
              format("\"some text\"", getLLVMStyleWithColumns(7)));
    EXPECT_EQ("\"some\"\n"
-            "\" text\"",
+            "\" tex\"\n"
+            "\"t\"",
              format("\"some text\"", getLLVMStyleWithColumns(6)));
    EXPECT_EQ("\"some\"\n"
              "\" tex\"\n"
@@ -3790,7 +3782,8 @@ TEST_F(FormatTest, DoNotBreakStringLiteralsInEscapeSequence) {
              "\"000001\"",
              format("\"test\\000000000001\"", getLLVMStyleWithColumns(9)));
    EXPECT_EQ("\"test\\000\"\n"
-            "\"000000001\"",
+            "\"00000000\"\n"
+            "\"1\"",
              format("\"test\\000000000001\"", getLLVMStyleWithColumns(10)));
    EXPECT_EQ("R\"(\\x\\x00)\"\n",
              format("R\"(\\x\\x00)\"\n", getLLVMStyleWithColumns(7)));
author	Alexander Kornienko <alexfh@google.com>
	Mon, 15 Apr 2013 14:28:00 +0000 (14:28 +0000)
committer	Alexander Kornienko <alexfh@google.com>
	Mon, 15 Apr 2013 14:28:00 +0000 (14:28 +0000)
lib/Format/BreakableToken.cpp	[new file with mode: 0644]	patch \| blob
lib/Format/BreakableToken.h	[new file with mode: 0644]	patch \| blob
lib/Format/CMakeLists.txt		patch \| blob \| history
lib/Format/Format.cpp		patch \| blob \| history
lib/Format/WhitespaceManager.cpp	[new file with mode: 0644]	patch \| blob
lib/Format/WhitespaceManager.h	[new file with mode: 0644]	patch \| blob
unittests/Format/FormatTest.cpp		patch \| blob \| history