From: Manuel Klimek <klimek@google.com>
Date: Mon, 7 Jan 2013 07:56:50 +0000 (+0000)
Subject: Fix layouting of tokens with a leading escaped newline.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9541938d4e3b9cbafdf0e1a28900c535b82767a4;p=clang

Fix layouting of tokens with a leading escaped newline.

If a token follows directly on an escaped newline, the escaped newline
is stored with the token. Since we re-layout escaped newlines, we need
to treat them just like normal whitespace - thus, we need to increase
the whitespace-length of the token, while decreasing the token length
(otherwise the token length contains the length of the escaped newline
and we double-count it while indenting).

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@171706 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index c6ff880186..a6beff0e4f 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -131,9 +131,12 @@ public:
     bool FitsOnALine = true;
     for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) {
       Columns += (Annotations[i].SpaceRequiredBefore ? 1 : 0) +
-                 Line.Tokens[i].Tok.getLength();
+                 Line.Tokens[i].TokenLength;
       // A special case for the colon of a constructor initializer as this only
       // needs to be put on a new line if the line needs to be split.
+      // FIXME: We need to check whether we're in a preprocessor directive, even
+      // if all tokens fit - the next line might be a preprocessor directive,
+      // too, in which case we need to account for the possible escaped newline.
       if (Columns > Style.ColumnLimit ||
           (Annotations[i].MustBreakBefore &&
            Annotations[i].Type != TokenAnnotation::TT_CtorInitializerColon)) {
@@ -247,7 +250,7 @@ private:
       unsigned WhitespaceStartColumn = State.Column;
       if (Current.Tok.is(tok::string_literal) &&
           Previous.Tok.is(tok::string_literal)) {
-        State.Column = State.Column - Previous.Tok.getLength();
+        State.Column = State.Column - Previous.TokenLength;
       } else if (Current.Tok.is(tok::lessless) &&
                  State.FirstLessLess[ParenLevel] != 0) {
         State.Column = State.FirstLessLess[ParenLevel];
@@ -286,7 +289,7 @@ private:
         State.Indent[ParenLevel] += 2;
     } else {
       if (Current.Tok.is(tok::equal) && Line.Tokens[0].Tok.is(tok::kw_for))
-        State.ForLoopVariablePos = State.Column - Previous.Tok.getLength();
+        State.ForLoopVariablePos = State.Column - Previous.TokenLength;
 
       unsigned Spaces = Annotations[Index].SpaceRequiredBefore ? 1 : 0;
       if (Annotations[Index].Type == TokenAnnotation::TT_LineComment)
@@ -321,7 +324,7 @@ private:
     if (Current.Tok.is(tok::lessless) && State.FirstLessLess[ParenLevel] == 0)
       State.FirstLessLess[ParenLevel] = State.Column;
 
-    State.Column += Current.Tok.getLength();
+    State.Column += Current.TokenLength;
 
     // If we encounter an opening (, [, { or <, we add a level to our stacks to
     // prepare for the following tokens.
@@ -862,6 +865,7 @@ private:
       } else if (isBinaryOperator(Line.Tokens[i])) {
         Annotation.Type = TokenAnnotation::TT_BinaryOperator;
       } else if (Tok.Tok.is(tok::comment)) {
+        // FIXME: Use Lexer::getSpelling(Tok, SourceMgr, LangOpts, bool*);
         StringRef Data(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
                        Tok.Tok.getLength());
         if (Data.startswith("//"))
@@ -1031,7 +1035,7 @@ public:
 
     FormatTok = FormatToken();
     Lex.LexFromRawLexer(FormatTok.Tok);
-    StringRef Text = tokenText(FormatTok.Tok);
+    StringRef Text = rawTokenText(FormatTok.Tok);
     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
     if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
       FormatTok.IsFirst = true;
@@ -1046,8 +1050,12 @@ public:
       if (FormatTok.Tok.is(tok::eof))
         return FormatTok;
       Lex.LexFromRawLexer(FormatTok.Tok);
-      Text = tokenText(FormatTok.Tok);
+      Text = rawTokenText(FormatTok.Tok);
     }
+
+    // Now FormatTok is the next non-whitespace token.
+    FormatTok.TokenLength = Text.size();
+
     // In case the token starts with escaped newlines, we want to
     // take them into account as whitespace - this pattern is quite frequent
     // in macro definitions.
@@ -1057,6 +1065,7 @@ public:
     unsigned i = 0;
     while (i + 1 < Text.size() && Text[i] == '\\' && Text[i+1] == '\n') {
       FormatTok.WhiteSpaceLength += 2;
+      FormatTok.TokenLength -= 2;
       i += 2;
     }
 
@@ -1082,7 +1091,7 @@ private:
   IdentifierTable IdentTable;
 
   /// Returns the text of \c FormatTok.
-  StringRef tokenText(Token &Tok) {
+  StringRef rawTokenText(Token &Tok) {
     return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
                      Tok.getLength());
   }
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 2308c92fa1..59844e4971 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -32,7 +32,7 @@ namespace format {
 struct FormatToken {
   FormatToken()
       : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
-        IsFirst(false) {
+        TokenLength(0), IsFirst(false) {
   }
 
   /// \brief The \c Token.
@@ -58,6 +58,11 @@ struct FormatToken {
   /// the \c Token.
   unsigned WhiteSpaceLength;
 
+  /// \brief The length of the non-whitespace parts of the token. This is
+  /// necessary because we need to handle escaped newlines that are stored
+  /// with the token.
+  unsigned TokenLength;
+
   /// \brief Indicates that this is the first token.
   bool IsFirst;
 };
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index de794dabb6..3e672ae5a3 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -481,6 +481,12 @@ TEST_F(FormatTest, FormatUnbalancedStructuralElements) {
             format("#define A } }\nint i;", getLLVMStyleWithColumns(11)));
 }
 
+TEST_F(FormatTest, EscapedNewlineAtStartOfTokenInMacroDefinition) {
+  EXPECT_EQ("#define A \\\n  int i;  \\\n  int j;",
+            format("#define A \\\nint i;\\\n  int j;",
+                   getLLVMStyleWithColumns(11)));
+}
+
 TEST_F(FormatTest, MixingPreprocessorDirectivesAndNormalCode) {
   EXPECT_EQ(
       "#define ALooooooooooooooooooooooooooooooooooooooongMacro("