From: Alexander Kornienko <alexfh@google.com>
Date: Mon, 2 Sep 2013 13:58:14 +0000 (+0000)
Subject: Store first and last newline position in the token text for string literals and comments.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4b762a91faac0473fa380ea9731992b24867e2e6;p=clang

Store first and last newline position in the token text for string literals and comments.

Summary:
Store first and last newline position in the token text for string literals and
comments to avoid doing .find('\n') for each possible solution.

Reviewers: djasper

Reviewed By: djasper

CC: cfe-commits, klimek

Differential Revision: http://llvm-reviews.chandlerc.com/D1556

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@189758 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 878580e892..9891cb2e09 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -583,23 +583,16 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
 unsigned
 ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
                                                 LineState &State) {
-  StringRef Text = Current.TokenText;
-  // We can only affect layout of the first and the last line, so the penalty
-  // for all other lines is constant, and we ignore it.
-  size_t FirstLineBreak = Text.find('\n');
-  size_t LastLineBreak = Text.find_last_of('\n');
-  assert(FirstLineBreak != StringRef::npos);
-  unsigned StartColumn = State.Column - Current.CodePointCount;
-  State.Column =
-      encoding::getCodePointCount(Text.substr(LastLineBreak + 1), Encoding);
-
   // Break before further function parameters on all levels.
   for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
     State.Stack[i].BreakBeforeParameter = true;
 
   unsigned ColumnsUsed =
-      StartColumn +
-      encoding::getCodePointCount(Text.substr(0, FirstLineBreak), Encoding);
+      State.Column - Current.CodePointCount + Current.CodePointsInFirstLine;
+  // We can only affect layout of the first and the last line, so the penalty
+  // for all other lines is constant, and we ignore it.
+  State.Column = Current.CodePointsInLastLine;
+
   if (ColumnsUsed > getColumnLimit())
     return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
   return 0;
@@ -619,7 +612,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
     // Don't break string literals with (in case of non-raw strings, escaped)
     // newlines. As clang-format must not change the string's content, it is
     // unlikely that we'll end up with a better format.
-    if (Current.IsMultiline)
+    if (Current.isMultiline())
       return addMultilineStringLiteral(Current, State);
 
     // Only break up default narrow strings.
@@ -649,14 +642,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
     // FIXME: If we want to handle them correctly, we'll need to adjust
     // leading whitespace in consecutive lines when changing indentation of
     // the first line similar to what we do with block comments.
-    if (Current.IsMultiline) {
-      StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
-      assert(EscapedNewlinePos != StringRef::npos);
-      State.Column =
-          StartColumn +
-          encoding::getCodePointCount(
-              Current.TokenText.substr(0, EscapedNewlinePos), Encoding) +
-          1;
+    if (Current.isMultiline()) {
+      State.Column = StartColumn + Current.CodePointsInFirstLine;
       return 0;
     }
 
@@ -740,7 +727,7 @@ bool ContinuationIndenter::NextIsMultilineString(const LineState &State) {
   // AlwaysBreakBeforeMultilineStrings implementation.
   if (Current.TokenText.startswith("R\""))
     return false;
-  if (Current.IsMultiline)
+  if (Current.isMultiline())
     return true;
   if (Current.getNextNonComment() &&
       Current.getNextNonComment()->is(tok::string_literal))
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 84bf36c7fd..39d2c0f96c 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -596,10 +596,16 @@ private:
     FormatTok->CodePointCount =
         encoding::getCodePointCount(FormatTok->TokenText, Encoding);
 
-    if (FormatTok->isOneOf(tok::string_literal, tok::comment) &&
-        FormatTok->TokenText.find('\n') != StringRef::npos)
-      FormatTok->IsMultiline = true;
-
+    if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
+      StringRef Text = FormatTok->TokenText;
+      size_t FirstNewlinePos = Text.find('\n');
+      if (FirstNewlinePos != StringRef::npos) {
+        FormatTok->CodePointsInFirstLine = encoding::getCodePointCount(
+            Text.substr(0, FirstNewlinePos), Encoding);
+        FormatTok->CodePointsInLastLine = encoding::getCodePointCount(
+            Text.substr(Text.find_last_of('\n') + 1), Encoding);
+      }
+    }
     // FIXME: Add the CodePointCount to Column.
     FormatTok->WhitespaceRange = SourceRange(
         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index 6006ec87c0..24d4c59e61 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -80,9 +80,9 @@ class TokenRole;
 /// whitespace characters preceeding it.
 struct FormatToken {
   FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false), IsMultiline(false),
-        LastNewlineOffset(0), CodePointCount(0), IsFirst(false),
-        MustBreakBefore(false), IsUnterminatedLiteral(false),
+      : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
+        CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
+        IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
         BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
         CanBreakBefore(false), ClosesTemplateDeclaration(false),
         ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
@@ -104,9 +104,6 @@ struct FormatToken {
   /// Token.
   bool HasUnescapedNewline;
 
-  /// \brief Whether the token text contains newlines (escaped or not).
-  bool IsMultiline;
-
   /// \brief The range of the whitespace immediately preceeding the \c Token.
   SourceRange WhitespaceRange;
 
@@ -118,6 +115,19 @@ struct FormatToken {
   /// We need this to correctly measure number of columns a token spans.
   unsigned CodePointCount;
 
+  /// \brief Contains the number of code points in the first line of a
+  /// multi-line string literal or comment. Zero if there's no newline in the
+  /// token.
+  unsigned CodePointsInFirstLine;
+
+  /// \brief Contains the number of code points in the last line of a
+  /// multi-line string literal or comment. Can be zero for line comments.
+  unsigned CodePointsInLastLine;
+
+  /// \brief Returns \c true if the token text contains newlines (escaped or
+  /// not).
+  bool isMultiline() const { return CodePointsInFirstLine != 0; }
+
   /// \brief Indicates that this is the first token.
   bool IsFirst;
 
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 5b9802ddd6..38a1c1a5e1 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -1025,7 +1025,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
     Current->CanBreakBefore =
         Current->MustBreakBefore || canBreakBefore(Line, *Current);
     if (Current->MustBreakBefore ||
-        (Current->is(tok::string_literal) && Current->IsMultiline))
+        (Current->is(tok::string_literal) && Current->isMultiline()))
       Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
     else
       Current->TotalLength = Current->Previous->TotalLength +