Store first and last newline position in the token text for string literals and comments.

author Alexander Kornienko <alexfh@google.com>

Mon, 2 Sep 2013 13:58:14 +0000 (13:58 +0000)

committer Alexander Kornienko <alexfh@google.com>

Mon, 2 Sep 2013 13:58:14 +0000 (13:58 +0000)
author Alexander Kornienko <alexfh@google.com>
Mon, 2 Sep 2013 13:58:14 +0000 (13:58 +0000)
committer Alexander Kornienko <alexfh@google.com>
Mon, 2 Sep 2013 13:58:14 +0000 (13:58 +0000)
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp

index 878580e8920d30faf59e06dff0295d32188d7294..9891cb2e09c06b7aa9d143fb954ee5de74fe3e60 100644 (file)
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -583,23 +583,16 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
  unsigned
  ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
                                                  LineState &State) {
-  StringRef Text = Current.TokenText;
-  // We can only affect layout of the first and the last line, so the penalty
-  // for all other lines is constant, and we ignore it.
-  size_t FirstLineBreak = Text.find('\n');
-  size_t LastLineBreak = Text.find_last_of('\n');
-  assert(FirstLineBreak != StringRef::npos);
-  unsigned StartColumn = State.Column - Current.CodePointCount;
-  State.Column =
-      encoding::getCodePointCount(Text.substr(LastLineBreak + 1), Encoding);
-
    // Break before further function parameters on all levels.
    for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
      State.Stack[i].BreakBeforeParameter = true;
  
    unsigned ColumnsUsed =
-      StartColumn +
-      encoding::getCodePointCount(Text.substr(0, FirstLineBreak), Encoding);
+      State.Column - Current.CodePointCount + Current.CodePointsInFirstLine;
+  // We can only affect layout of the first and the last line, so the penalty
+  // for all other lines is constant, and we ignore it.
+  State.Column = Current.CodePointsInLastLine;
+
    if (ColumnsUsed > getColumnLimit())
      return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
    return 0;
@@ -619,7 +612,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
      // Don't break string literals with (in case of non-raw strings, escaped)
      // newlines. As clang-format must not change the string's content, it is
      // unlikely that we'll end up with a better format.
-    if (Current.IsMultiline)
+    if (Current.isMultiline())
        return addMultilineStringLiteral(Current, State);
  
      // Only break up default narrow strings.
@@ -649,14 +642,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
      // FIXME: If we want to handle them correctly, we'll need to adjust
      // leading whitespace in consecutive lines when changing indentation of
      // the first line similar to what we do with block comments.
-    if (Current.IsMultiline) {
-      StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
-      assert(EscapedNewlinePos != StringRef::npos);
-      State.Column =
-          StartColumn +
-          encoding::getCodePointCount(
-              Current.TokenText.substr(0, EscapedNewlinePos), Encoding) +
-          1;
+    if (Current.isMultiline()) {
+      State.Column = StartColumn + Current.CodePointsInFirstLine;
        return 0;
      }
  
@@ -740,7 +727,7 @@ bool ContinuationIndenter::NextIsMultilineString(const LineState &State) {
    // AlwaysBreakBeforeMultilineStrings implementation.
    if (Current.TokenText.startswith("R\""))
      return false;
-  if (Current.IsMultiline)
+  if (Current.isMultiline())
      return true;
    if (Current.getNextNonComment() &&
        Current.getNextNonComment()->is(tok::string_literal))
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp

index 84bf36c7fd2adeb69d30e7fdb716488fe3876950..39d2c0f96c3667ed6443bd2c6eed54fae4c7f2b9 100644 (file)
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -596,10 +596,16 @@ private:
      FormatTok->CodePointCount =
          encoding::getCodePointCount(FormatTok->TokenText, Encoding);
  
-    if (FormatTok->isOneOf(tok::string_literal, tok::comment) &&
-        FormatTok->TokenText.find('\n') != StringRef::npos)
-      FormatTok->IsMultiline = true;
-
+    if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
+      StringRef Text = FormatTok->TokenText;
+      size_t FirstNewlinePos = Text.find('\n');
+      if (FirstNewlinePos != StringRef::npos) {
+        FormatTok->CodePointsInFirstLine = encoding::getCodePointCount(
+            Text.substr(0, FirstNewlinePos), Encoding);
+        FormatTok->CodePointsInLastLine = encoding::getCodePointCount(
+            Text.substr(Text.find_last_of('\n') + 1), Encoding);
+      }
+    }
      // FIXME: Add the CodePointCount to Column.
      FormatTok->WhitespaceRange = SourceRange(
          WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h

index 6006ec87c03d9d33b13e1f5d393f4f3b915459c7..24d4c59e61adc6392e51f3b3f94eca5fbf70015a 100644 (file)
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -80,9 +80,9 @@ class TokenRole;
  /// whitespace characters preceeding it.
  struct FormatToken {
    FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false), IsMultiline(false),
-        LastNewlineOffset(0), CodePointCount(0), IsFirst(false),
-        MustBreakBefore(false), IsUnterminatedLiteral(false),
+      : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
+        CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
+        IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
          BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
          CanBreakBefore(false), ClosesTemplateDeclaration(false),
          ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
@@ -104,9 +104,6 @@ struct FormatToken {
    /// Token.
    bool HasUnescapedNewline;
  
-  /// \brief Whether the token text contains newlines (escaped or not).
-  bool IsMultiline;
-
    /// \brief The range of the whitespace immediately preceeding the \c Token.
    SourceRange WhitespaceRange;
  
@@ -118,6 +115,19 @@ struct FormatToken {
    /// We need this to correctly measure number of columns a token spans.
    unsigned CodePointCount;
  
+  /// \brief Contains the number of code points in the first line of a
+  /// multi-line string literal or comment. Zero if there's no newline in the
+  /// token.
+  unsigned CodePointsInFirstLine;
+
+  /// \brief Contains the number of code points in the last line of a
+  /// multi-line string literal or comment. Can be zero for line comments.
+  unsigned CodePointsInLastLine;
+
+  /// \brief Returns \c true if the token text contains newlines (escaped or
+  /// not).
+  bool isMultiline() const { return CodePointsInFirstLine != 0; }
+
    /// \brief Indicates that this is the first token.
    bool IsFirst;
  
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp

index 5b9802ddd6ed3f8f6c28972a698c6d270a68a031..38a1c1a5e1e23a1dee6c5d3ee560e0b49dc0f8a2 100644 (file)
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -1025,7 +1025,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
      Current->CanBreakBefore =
          Current->MustBreakBefore || canBreakBefore(Line, *Current);
      if (Current->MustBreakBefore ||
-        (Current->is(tok::string_literal) && Current->IsMultiline))
+        (Current->is(tok::string_literal) && Current->isMultiline()))
        Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
      else
        Current->TotalLength = Current->Previous->TotalLength +
author	Alexander Kornienko <alexfh@google.com>
	Mon, 2 Sep 2013 13:58:14 +0000 (13:58 +0000)
committer	Alexander Kornienko <alexfh@google.com>
	Mon, 2 Sep 2013 13:58:14 +0000 (13:58 +0000)
lib/Format/ContinuationIndenter.cpp		patch \| blob \| history
lib/Format/Format.cpp		patch \| blob \| history
lib/Format/FormatToken.h		patch \| blob \| history
lib/Format/TokenAnnotator.cpp		patch \| blob \| history