From: Manuel Klimek <klimek@google.com>
Date: Tue, 22 Jan 2013 16:31:55 +0000 (+0000)
Subject: Implements more principled comment parsing.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=86721d2a4610ac0a4b162952ec409df1fe397d58;p=clang

Implements more principled comment parsing.

Changing nextToken() in the UnwrappedLineParser to get the next
non-comment token. This allows us to correctly layout a whole class of
snippets, like:

if /* */(/* */ a /* */) /* */
  f() /* */; /* */
else /* */
  g();

Fixes a bug in the formatter where we would assume there is a previous
non-comment token.
Also adds the indent level of an unwrapped line to the debug output in
the parser.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173168 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 08b0d97983..1405fc23e1 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -586,7 +586,8 @@ private:
       if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
           State.NextToken->Parent->Type == TT_TemplateOpener)
         State.Stack[ParenLevel].Indent = State.Column + Spaces;
-      if (Current.getPreviousNoneComment()->is(tok::comma) &&
+      if (Current.getPreviousNoneComment() != NULL &&
+          Current.getPreviousNoneComment()->is(tok::comma) &&
           Current.isNot(tok::comment))
         State.Stack[ParenLevel].HasMultiParameterLine = true;
 
@@ -1639,6 +1640,7 @@ public:
     // FIXME: Add a more explicit test.
     unsigned i = 0;
     while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
+      // FIXME: ++FormatTok.NewlinesBefore is missing...
       FormatTok.WhiteSpaceLength += 2;
       FormatTok.TokenLength -= 2;
       i += 2;
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 2d6c5cf070..53fa2a32df 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -130,6 +130,7 @@ bool UnwrappedLineParser::parse() {
 bool UnwrappedLineParser::parseFile() {
   bool Error = parseLevel(/*HasOpeningBrace=*/false);
   // Make sure to format the remaining tokens.
+  flushComments(true);
   addUnwrappedLine();
   return Error;
 }
@@ -174,12 +175,14 @@ bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
 
   Line->Level += AddLevels;
   parseLevel(/*HasOpeningBrace=*/true);
-  Line->Level -= AddLevels;
 
-  if (!FormatTok.Tok.is(tok::r_brace))
+  if (!FormatTok.Tok.is(tok::r_brace)) {
+    Line->Level -= AddLevels;
     return true;
+  }
 
   nextToken();  // Munch the closing brace.
+  Line->Level -= AddLevels;
   return false;
 }
 
@@ -232,18 +235,8 @@ void UnwrappedLineParser::parsePPUnknown() {
   addUnwrappedLine();
 }
 
-void UnwrappedLineParser::parseComments() {
-  // Consume leading line comments, e.g. for branches without compounds.
-  while (FormatTok.Tok.is(tok::comment)) {
-    nextToken();
-    addUnwrappedLine();
-  }
-}
-
 void UnwrappedLineParser::parseStructuralElement() {
   assert(!FormatTok.Tok.is(tok::l_brace));
-  parseComments();
-
   int TokenNumber = 0;
   switch (FormatTok.Tok.getKind()) {
   case tok::at:
@@ -598,11 +591,6 @@ void UnwrappedLineParser::parseEnum() {
     ++Line->Level;
     do {
       switch (FormatTok.Tok.getKind()) {
-      case tok::comment:
-        // FIXME: Handle comments centrally, instead of special casing
-        // them everywhere.
-        parseComments();
-        break;
       case tok::l_paren:
         parseParens();
         break;
@@ -731,13 +719,8 @@ void UnwrappedLineParser::parseObjCProtocol() {
 void UnwrappedLineParser::addUnwrappedLine() {
   if (Line->Tokens.empty())
     return;
-  // Consume trailing comments.
-  while (!eof() && FormatTok.NewlinesBefore == 0 &&
-         FormatTok.Tok.is(tok::comment)) {
-    nextToken();
-  }
   DEBUG({
-    llvm::dbgs() << "Line: ";
+    llvm::dbgs() << "Line(" << Line->Level << "): ";
     for (std::list<FormatToken>::iterator I = Line->Tokens.begin(),
                                           E = Line->Tokens.end();
          I != E; ++I) {
@@ -763,28 +746,63 @@ bool UnwrappedLineParser::eof() const {
   return FormatTok.Tok.is(tok::eof);
 }
 
+void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
+  bool JustComments = Line->Tokens.empty();
+  for (SmallVectorImpl<FormatToken>::const_iterator
+           I = CommentsBeforeNextToken.begin(),
+           E = CommentsBeforeNextToken.end();
+       I != E; ++I) {
+    if (I->HasUnescapedNewline && JustComments) {
+      addUnwrappedLine();
+    }
+    pushToken(*I);
+  }
+  if (NewlineBeforeNext && JustComments) {
+    addUnwrappedLine();
+  }
+  CommentsBeforeNextToken.clear();
+}
+
 void UnwrappedLineParser::nextToken() {
   if (eof())
     return;
-  Line->Tokens.push_back(FormatTok);
-  if (MustBreakBeforeNextToken) {
-    Line->Tokens.back().MustBreakBefore = true;
-    MustBreakBeforeNextToken = false;
-  }
+  flushComments(FormatTok.HasUnescapedNewline);
+  pushToken(FormatTok);
   readToken();
 }
 
 void UnwrappedLineParser::readToken() {
-  FormatTok = Tokens->getNextToken();
-  while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
-         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
-          FormatTok.IsFirst)) {
-    // If there is an unfinished unwrapped line, we flush the preprocessor
-    // directives only after that unwrapped line was finished later.
-    bool SwitchToPreprocessorLines = !Line->Tokens.empty() &&
-                                     CurrentLines == &Lines;
-    ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
-    parsePPDirective();
+  bool CommentsInCurrentLine = true;
+  do {
+    FormatTok = Tokens->getNextToken();
+    while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
+           ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
+            FormatTok.IsFirst)) {
+      // If there is an unfinished unwrapped line, we flush the preprocessor
+      // directives only after that unwrapped line was finished later.
+      bool SwitchToPreprocessorLines = !Line->Tokens.empty() &&
+                                       CurrentLines == &Lines;
+      ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
+      parsePPDirective();
+    }
+    if (!FormatTok.Tok.is(tok::comment))
+      return;
+    if (FormatTok.HasUnescapedNewline || FormatTok.IsFirst) {
+      CommentsInCurrentLine = false;
+    }
+    if (CommentsInCurrentLine) {
+      pushToken(FormatTok);
+    } else {
+      CommentsBeforeNextToken.push_back(FormatTok);
+    }
+  } while (!eof());
+}
+
+void UnwrappedLineParser::pushToken(const FormatToken &Tok) {
+  Line->Tokens.push_back(Tok);
+  if (MustBreakBeforeNextToken) {
+    Line->Tokens.back().MustBreakBefore = true;
+    MustBreakBeforeNextToken = false;
   }
 }
 
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 47148ef66d..f48594d592 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -128,7 +128,6 @@ private:
   void parsePPDirective();
   void parsePPDefine();
   void parsePPUnknown();
-  void parseComments();
   void parseStructuralElement();
   void parseBracedList();
   void parseReturn();
@@ -151,11 +150,19 @@ private:
   bool eof() const;
   void nextToken();
   void readToken();
+  void flushComments(bool NewlineBeforeNext);
+  void pushToken(const FormatToken &Tok);
 
   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
   // and use that everywhere in the Parser.
   OwningPtr<UnwrappedLine> Line;
+
+  // Comments are sorted into unwrapped lines by whether they are in the same
+  // line as the previous token, or not. If not, they belong to the next token.
+  // Since the next token might already be in a new unwrapped line, we need to
+  // store the comments belonging to that token.
+  SmallVector<FormatToken, 1> CommentsBeforeNextToken;
   FormatToken FormatTok;
   bool MustBreakBeforeNextToken;
 
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index 201a08bb5f..b0f0fdba36 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -1625,6 +1625,47 @@ TEST_F(FormatTest, MergeHandlingInTheFaceOfPreprocessorDirectives) {
                "  if (true) continue;", ShortMergedIf);
 }
 
+TEST_F(FormatTest, BlockCommentsInControlLoops) {
+  verifyFormat("if (0) /* a comment in a strange place */ {\n"
+               "  f();\n"
+               "}");
+  verifyFormat("if (0) /* a comment in a strange place */ {\n"
+               "  f();\n"
+               "} /* another comment */ else /* comment #3 */ {\n"
+               "  g();\n"
+               "}");
+  verifyFormat("while (0) /* a comment in a strange place */ {\n"
+               "  f();\n"
+               "}");
+  verifyFormat("for (;;) /* a comment in a strange place */ {\n"
+               "  f();\n"
+               "}");
+  verifyFormat("do /* a comment in a strange place */ {\n"
+               "  f();\n"
+               "} /* another comment */ while (0);");
+}
+
+TEST_F(FormatTest, BlockComments) {
+  EXPECT_EQ("/* */ /* */ /* */\n/* */ /* */ /* */",
+            format("/* *//* */  /* */\n/* *//* */  /* */"));
+  EXPECT_EQ("/* */ a /* */ b;",
+            format("  /* */  a/* */  b;"));
+  EXPECT_EQ("#define A /*   */\\\n"
+            "  b\n"
+            "/* */\n"
+            "someCall(\n"
+            "    parameter);",
+            format("#define A /*   */ b\n"
+                   "/* */\n"
+                   "someCall(parameter);", getLLVMStyleWithColumns(15)));
+
+  EXPECT_EQ("#define A\n"
+            "/* */ someCall(\n"
+            "    parameter);",
+            format("#define A\n"
+                   "/* */someCall(parameter);", getLLVMStyleWithColumns(15)));
+}
+
 //===----------------------------------------------------------------------===//
 // Objective-C tests.
 //===----------------------------------------------------------------------===//