From 526ed11ad9743c773df76bd1649d33fb92c2b8cb Mon Sep 17 00:00:00 2001
From: Manuel Klimek <klimek@google.com>
Date: Wed, 9 Jan 2013 15:25:02 +0000
Subject: [PATCH] Enables layouting unwrapped lines around preprocessor
 directives.

Previously, we'd always start at indent level 0 after a preprocessor
directive, now we layout the following snippet (column limit 69) as
follows:

functionCallTo(someOtherFunction(
    withSomeParameters, whichInSequence,
    areLongerThanALine(andAnotherCall,
  B
                       withMoreParamters,
                       whichStronglyInfluenceTheLayout),
    andMoreParameters),
               trailing);

Note that the different jumping indent is a different issue that will be
addressed separately.

This is the first step towards handling #ifdef->#else->#endif chains
correctly.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@171974 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Format/Format.cpp              | 21 ++++----
 lib/Format/UnwrappedLineParser.cpp | 78 ++++++++++++++++++------------
 lib/Format/UnwrappedLineParser.h   | 11 ++++-
 unittests/Format/FormatTest.cpp    | 36 +++++++++++++-
 4 files changed, 104 insertions(+), 42 deletions(-)

diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 1854d70d90..e271ba2e13 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -803,18 +803,21 @@ public:
   void calculateExtraInformation(AnnotatedToken &Current) {
     Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
 
-    if (Current.Type == TT_CtorInitializerColon || Current.Parent->Type ==
-        TT_LineComment || (Current.is(tok::string_literal) &&
-                           Current.Parent->is(tok::string_literal))) {
-      Current.MustBreakBefore = true;
-    } else if (Current.is(tok::at) && Current.Parent->Parent->is(tok::at)) {
-      // Don't put two objc's '@' on the same line. This could happen,
-      // as in, @optional @property ...
+    if (Current.FormatTok.MustBreakBefore) {
       Current.MustBreakBefore = true;
     } else {
-      Current.MustBreakBefore = false;
+      if (Current.Type == TT_CtorInitializerColon || Current.Parent->Type ==
+          TT_LineComment || (Current.is(tok::string_literal) &&
+                             Current.Parent->is(tok::string_literal))) {
+        Current.MustBreakBefore = true;
+      } else if (Current.is(tok::at) && Current.Parent->Parent->is(tok::at)) {
+        // Don't put two objc's '@' on the same line. This could happen,
+        // as in, @optional @property ...
+        Current.MustBreakBefore = true;
+      } else {
+        Current.MustBreakBefore = false;
+      }
     }
-
     Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
 
     if (!Current.Children.empty())
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 905758993e..99d5f6279d 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -74,8 +74,9 @@ private:
 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
                                          FormatTokenSource &Tokens,
                                          UnwrappedLineConsumer &Callback)
-    : RootTokenInitialized(false), Style(Style), Tokens(&Tokens),
-      Callback(Callback) {
+    : Line(new UnwrappedLine), RootTokenInitialized(false),
+      LastInCurrentLine(NULL), MustBreakBeforeNextToken(false), Style(Style),
+      Tokens(&Tokens), Callback(Callback) {
 }
 
 bool UnwrappedLineParser::parse() {
@@ -126,9 +127,9 @@ bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
 
   addUnwrappedLine();
 
-  Line.Level += AddLevels;
+  Line->Level += AddLevels;
   parseLevel(/*HasOpeningBrace=*/true);
-  Line.Level -= AddLevels;
+  Line->Level -= AddLevels;
 
   if (!FormatTok.Tok.is(tok::r_brace))
     return true;
@@ -139,7 +140,7 @@ bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
 
 void UnwrappedLineParser::parsePPDirective() {
   assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
-  ScopedMacroState MacroState(Line, Tokens, FormatTok);
+  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
   nextToken();
 
   if (FormatTok.Tok.getIdentifierInfo() == NULL) {
@@ -169,7 +170,7 @@ void UnwrappedLineParser::parsePPDefine() {
     parseParens();
   }
   addUnwrappedLine();
-  Line.Level = 1;
+  Line->Level = 1;
 
   // Errors during a preprocessor directive can only affect the layout of the
   // preprocessor directive, and thus we ignore them. An alternative approach
@@ -319,9 +320,9 @@ void UnwrappedLineParser::parseIfThenElse() {
     NeedsUnwrappedLine = true;
   } else {
     addUnwrappedLine();
-    ++Line.Level;
+    ++Line->Level;
     parseStructuralElement();
-    --Line.Level;
+    --Line->Level;
   }
   if (FormatTok.Tok.is(tok::kw_else)) {
     nextToken();
@@ -332,9 +333,9 @@ void UnwrappedLineParser::parseIfThenElse() {
       parseIfThenElse();
     } else {
       addUnwrappedLine();
-      ++Line.Level;
+      ++Line->Level;
       parseStructuralElement();
-      --Line.Level;
+      --Line->Level;
     }
   } else if (NeedsUnwrappedLine) {
     addUnwrappedLine();
@@ -363,9 +364,9 @@ void UnwrappedLineParser::parseForOrWhileLoop() {
     addUnwrappedLine();
   } else {
     addUnwrappedLine();
-    ++Line.Level;
+    ++Line->Level;
     parseStructuralElement();
-    --Line.Level;
+    --Line->Level;
   }
 }
 
@@ -376,9 +377,9 @@ void UnwrappedLineParser::parseDoWhile() {
     parseBlock();
   } else {
     addUnwrappedLine();
-    ++Line.Level;
+    ++Line->Level;
     parseStructuralElement();
-    --Line.Level;
+    --Line->Level;
   }
 
   // FIXME: Add error handling.
@@ -395,14 +396,14 @@ void UnwrappedLineParser::parseLabel() {
   // FIXME: remove all asserts.
   assert(FormatTok.Tok.is(tok::colon) && "':' expected");
   nextToken();
-  unsigned OldLineLevel = Line.Level;
-  if (Line.Level > 0)
-    --Line.Level;
+  unsigned OldLineLevel = Line->Level;
+  if (Line->Level > 0)
+    --Line->Level;
   if (FormatTok.Tok.is(tok::l_brace)) {
     parseBlock();
   }
   addUnwrappedLine();
-  Line.Level = OldLineLevel;
+  Line->Level = OldLineLevel;
 }
 
 void UnwrappedLineParser::parseCaseLabel() {
@@ -423,9 +424,9 @@ void UnwrappedLineParser::parseSwitch() {
     addUnwrappedLine();
   } else {
     addUnwrappedLine();
-    Line.Level += (Style.IndentCaseLabels ? 2 : 1);
+    Line->Level += (Style.IndentCaseLabels ? 2 : 1);
     parseStructuralElement();
-    Line.Level -= (Style.IndentCaseLabels ? 2 : 1);
+    Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
   }
 }
 
@@ -444,7 +445,7 @@ void UnwrappedLineParser::parseEnum() {
     case tok::l_brace:
       nextToken();
       addUnwrappedLine();
-      ++Line.Level;
+      ++Line->Level;
       parseComments();
       break;
     case tok::l_paren:
@@ -458,7 +459,7 @@ void UnwrappedLineParser::parseEnum() {
     case tok::r_brace:
       if (HasContents)
         addUnwrappedLine();
-      --Line.Level;
+      --Line->Level;
       nextToken();
       break;
     case tok::semi:
@@ -501,8 +502,9 @@ void UnwrappedLineParser::addUnwrappedLine() {
          FormatTok.Tok.is(tok::comment)) {
     nextToken();
   }
-  Callback.consumeUnwrappedLine(Line);
+  Callback.consumeUnwrappedLine(*Line);
   RootTokenInitialized = false;
+  LastInCurrentLine = NULL;
 }
 
 bool UnwrappedLineParser::eof() const {
@@ -513,26 +515,42 @@ void UnwrappedLineParser::nextToken() {
   if (eof())
     return;
   if (RootTokenInitialized) {
+    assert(LastInCurrentLine->Children.empty());
     LastInCurrentLine->Children.push_back(FormatTok);
     LastInCurrentLine = &LastInCurrentLine->Children.back();
   } else {
-    Line.RootToken = FormatTok;
+    Line->RootToken = FormatTok;
     RootTokenInitialized = true;
-    LastInCurrentLine = &Line.RootToken;
+    LastInCurrentLine = &Line->RootToken;
+  }
+  if (MustBreakBeforeNextToken) {
+    LastInCurrentLine->MustBreakBefore = true;
+    MustBreakBeforeNextToken = false;
   }
   readToken();
 }
 
 void UnwrappedLineParser::readToken() {
   FormatTok = Tokens->getNextToken();
-  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
+  while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
          ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
           FormatTok.IsFirst)) {
-    // FIXME: This is incorrect - the correct way is to create a
-    // data structure that will construct the parts around the preprocessor
-    // directive as a structured \c UnwrappedLine.
-    addUnwrappedLine();
+    UnwrappedLine* StoredLine = Line.take();
+    Line.reset(new UnwrappedLine(*StoredLine));
+    assert(LastInCurrentLine == NULL || LastInCurrentLine->Children.empty());
+    FormatToken *StoredLastInCurrentLine = LastInCurrentLine;
+    bool PreviousInitialized = RootTokenInitialized;
+    RootTokenInitialized = false;
+    LastInCurrentLine = NULL;
+
     parsePPDirective();
+
+    assert(!RootTokenInitialized);
+    Line.reset(StoredLine);
+    RootTokenInitialized = PreviousInitialized;
+    LastInCurrentLine = StoredLastInCurrentLine;
+    assert(LastInCurrentLine == NULL || LastInCurrentLine->Children.empty());
+    MustBreakBeforeNextToken = true;
   }
 }
 
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 010bad8ece..500054fe9a 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -34,7 +34,7 @@ namespace format {
 struct FormatToken {
   FormatToken()
       : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
-        TokenLength(0), IsFirst(false) {
+        TokenLength(0), IsFirst(false), MustBreakBefore(false) {
   }
 
   /// \brief The \c Token.
@@ -68,6 +68,12 @@ struct FormatToken {
   /// \brief Indicates that this is the first token.
   bool IsFirst;
 
+  /// \brief Whether there must be a line break before this token.
+  ///
+  /// This happens for example when a preprocessor directive ended directly
+  /// before the token.
+  bool MustBreakBefore;
+
   // FIXME: We currently assume that there is exactly one token in this vector
   // except for the very last token that does not have any children.
   /// \brief All tokens that logically follow this token.
@@ -144,10 +150,11 @@ private:
   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
   // and use that everywhere in the Parser.
-  UnwrappedLine Line;
+  llvm::OwningPtr<UnwrappedLine> Line;
   bool RootTokenInitialized;
   FormatToken *LastInCurrentLine;
   FormatToken FormatTok;
+  bool MustBreakBeforeNextToken;
 
   const FormatStyle &Style;
   FormatTokenSource *Tokens;
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index d4e5a51bc3..c21ff24834 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -46,17 +46,24 @@ protected:
   std::string messUp(llvm::StringRef Code) {
     std::string MessedUp(Code.str());
     bool InComment = false;
+    bool InPreprocessorDirective = false;
     bool JustReplacedNewline = false;
     for (unsigned i = 0, e = MessedUp.size() - 1; i != e; ++i) {
       if (MessedUp[i] == '/' && MessedUp[i + 1] == '/') {
         if (JustReplacedNewline)
           MessedUp[i - 1] = '\n';
         InComment = true;
+      } else if (MessedUp[i] == '#' && JustReplacedNewline) {
+        MessedUp[i - 1] = '\n';
+        InPreprocessorDirective = true;
       } else if (MessedUp[i] == '\\' && MessedUp[i + 1] == '\n') {
         MessedUp[i] = ' ';
+        MessedUp[i + 1] = ' ';
       } else if (MessedUp[i] == '\n') {
         if (InComment) {
           InComment = false;
+        } else if (InPreprocessorDirective) {
+          InPreprocessorDirective = false;
         } else {
           JustReplacedNewline = true;
           MessedUp[i] = ' ';
@@ -84,6 +91,14 @@ protected:
   }
 };
 
+TEST_F(FormatTest, MessUp) {
+  EXPECT_EQ("1 2 3", messUp("1 2 3"));
+  EXPECT_EQ("1 2 3\n", messUp("1\n2\n3\n"));
+  EXPECT_EQ("a\n//b\nc", messUp("a\n//b\nc"));
+  EXPECT_EQ("a\n#b\nc", messUp("a\n#b\nc"));
+  EXPECT_EQ("a\n#b  c  d\ne", messUp("a\n#b\\\nc\\\nd\ne"));
+}
+
 //===----------------------------------------------------------------------===//
 // Basic function tests.
 //===----------------------------------------------------------------------===//
@@ -545,7 +560,9 @@ TEST_F(FormatTest, LayoutSingleUnwrappedLineInMacro) {
 }
 
 TEST_F(FormatTest, MacroDefinitionInsideStatement) {
-  EXPECT_EQ("int x,\n#define A\ny;", format("int x,\n#define A\ny;"));
+  EXPECT_EQ("int x,\n"
+            "#define A\n"
+            "    y;", format("int x,\n#define A\ny;"));
 }
 
 TEST_F(FormatTest, HashInMacroDefinition) {
@@ -609,6 +626,23 @@ TEST_F(FormatTest, MixingPreprocessorDirectivesAndNormalCode) {
              "  aLooooooooooooooooooooooonPaaaaaaaaaaaaaaaaaaaaarmmmm);\n"));
 }
 
+TEST_F(FormatTest, LayoutStatementsAroundPreprocessorDirectives) {
+  EXPECT_EQ("int\n"
+            "#define A\n"
+            "    a;",
+            format("int\n#define A\na;"));
+  verifyFormat(
+      "functionCallTo(someOtherFunction(\n"
+      "    withSomeParameters, whichInSequence,\n"
+      "    areLongerThanALine(andAnotherCall,\n"
+      "#define A                                                           \\\n"
+      "  B\n"
+      "                       withMoreParamters,\n"
+      "                       whichStronglyInfluenceTheLayout),\n"
+      "    andMoreParameters),\n"
+      "               trailing);", getLLVMStyleWithColumns(69));
+}
+
 //===----------------------------------------------------------------------===//
 // Line break tests.
 //===----------------------------------------------------------------------===//
-- 
2.40.0