From: Manuel Klimek Date: Tue, 22 Jan 2013 16:31:55 +0000 (+0000) Subject: Implements more principled comment parsing. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=86721d2a4610ac0a4b162952ec409df1fe397d58;p=clang Implements more principled comment parsing. Changing nextToken() in the UnwrappedLineParser to get the next non-comment token. This allows us to correctly layout a whole class of snippets, like: if /* */(/* */ a /* */) /* */ f() /* */; /* */ else /* */ g(); Fixes a bug in the formatter where we would assume there is a previous non-comment token. Also adds the indent level of an unwrapped line to the debug output in the parser. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173168 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 08b0d97983..1405fc23e1 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -586,7 +586,8 @@ private: if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) || State.NextToken->Parent->Type == TT_TemplateOpener) State.Stack[ParenLevel].Indent = State.Column + Spaces; - if (Current.getPreviousNoneComment()->is(tok::comma) && + if (Current.getPreviousNoneComment() != NULL && + Current.getPreviousNoneComment()->is(tok::comma) && Current.isNot(tok::comment)) State.Stack[ParenLevel].HasMultiParameterLine = true; @@ -1639,6 +1640,7 @@ public: // FIXME: Add a more explicit test. unsigned i = 0; while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { + // FIXME: ++FormatTok.NewlinesBefore is missing... FormatTok.WhiteSpaceLength += 2; FormatTok.TokenLength -= 2; i += 2; diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 2d6c5cf070..53fa2a32df 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -130,6 +130,7 @@ bool UnwrappedLineParser::parse() { bool UnwrappedLineParser::parseFile() { bool Error = parseLevel(/*HasOpeningBrace=*/false); // Make sure to format the remaining tokens. + flushComments(true); addUnwrappedLine(); return Error; } @@ -174,12 +175,14 @@ bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { Line->Level += AddLevels; parseLevel(/*HasOpeningBrace=*/true); - Line->Level -= AddLevels; - if (!FormatTok.Tok.is(tok::r_brace)) + if (!FormatTok.Tok.is(tok::r_brace)) { + Line->Level -= AddLevels; return true; + } nextToken(); // Munch the closing brace. + Line->Level -= AddLevels; return false; } @@ -232,18 +235,8 @@ void UnwrappedLineParser::parsePPUnknown() { addUnwrappedLine(); } -void UnwrappedLineParser::parseComments() { - // Consume leading line comments, e.g. for branches without compounds. - while (FormatTok.Tok.is(tok::comment)) { - nextToken(); - addUnwrappedLine(); - } -} - void UnwrappedLineParser::parseStructuralElement() { assert(!FormatTok.Tok.is(tok::l_brace)); - parseComments(); - int TokenNumber = 0; switch (FormatTok.Tok.getKind()) { case tok::at: @@ -598,11 +591,6 @@ void UnwrappedLineParser::parseEnum() { ++Line->Level; do { switch (FormatTok.Tok.getKind()) { - case tok::comment: - // FIXME: Handle comments centrally, instead of special casing - // them everywhere. - parseComments(); - break; case tok::l_paren: parseParens(); break; @@ -731,13 +719,8 @@ void UnwrappedLineParser::parseObjCProtocol() { void UnwrappedLineParser::addUnwrappedLine() { if (Line->Tokens.empty()) return; - // Consume trailing comments. - while (!eof() && FormatTok.NewlinesBefore == 0 && - FormatTok.Tok.is(tok::comment)) { - nextToken(); - } DEBUG({ - llvm::dbgs() << "Line: "; + llvm::dbgs() << "Line(" << Line->Level << "): "; for (std::list::iterator I = Line->Tokens.begin(), E = Line->Tokens.end(); I != E; ++I) { @@ -763,28 +746,63 @@ bool UnwrappedLineParser::eof() const { return FormatTok.Tok.is(tok::eof); } +void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { + bool JustComments = Line->Tokens.empty(); + for (SmallVectorImpl::const_iterator + I = CommentsBeforeNextToken.begin(), + E = CommentsBeforeNextToken.end(); + I != E; ++I) { + if (I->HasUnescapedNewline && JustComments) { + addUnwrappedLine(); + } + pushToken(*I); + } + if (NewlineBeforeNext && JustComments) { + addUnwrappedLine(); + } + CommentsBeforeNextToken.clear(); +} + void UnwrappedLineParser::nextToken() { if (eof()) return; - Line->Tokens.push_back(FormatTok); - if (MustBreakBeforeNextToken) { - Line->Tokens.back().MustBreakBefore = true; - MustBreakBeforeNextToken = false; - } + flushComments(FormatTok.HasUnescapedNewline); + pushToken(FormatTok); readToken(); } void UnwrappedLineParser::readToken() { - FormatTok = Tokens->getNextToken(); - while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && - ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || - FormatTok.IsFirst)) { - // If there is an unfinished unwrapped line, we flush the preprocessor - // directives only after that unwrapped line was finished later. - bool SwitchToPreprocessorLines = !Line->Tokens.empty() && - CurrentLines == &Lines; - ScopedLineState BlockState(*this, SwitchToPreprocessorLines); - parsePPDirective(); + bool CommentsInCurrentLine = true; + do { + FormatTok = Tokens->getNextToken(); + while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && + ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || + FormatTok.IsFirst)) { + // If there is an unfinished unwrapped line, we flush the preprocessor + // directives only after that unwrapped line was finished later. + bool SwitchToPreprocessorLines = !Line->Tokens.empty() && + CurrentLines == &Lines; + ScopedLineState BlockState(*this, SwitchToPreprocessorLines); + parsePPDirective(); + } + if (!FormatTok.Tok.is(tok::comment)) + return; + if (FormatTok.HasUnescapedNewline || FormatTok.IsFirst) { + CommentsInCurrentLine = false; + } + if (CommentsInCurrentLine) { + pushToken(FormatTok); + } else { + CommentsBeforeNextToken.push_back(FormatTok); + } + } while (!eof()); +} + +void UnwrappedLineParser::pushToken(const FormatToken &Tok) { + Line->Tokens.push_back(Tok); + if (MustBreakBeforeNextToken) { + Line->Tokens.back().MustBreakBefore = true; + MustBreakBeforeNextToken = false; } } diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 47148ef66d..f48594d592 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -128,7 +128,6 @@ private: void parsePPDirective(); void parsePPDefine(); void parsePPUnknown(); - void parseComments(); void parseStructuralElement(); void parseBracedList(); void parseReturn(); @@ -151,11 +150,19 @@ private: bool eof() const; void nextToken(); void readToken(); + void flushComments(bool NewlineBeforeNext); + void pushToken(const FormatToken &Tok); // FIXME: We are constantly running into bugs where Line.Level is incorrectly // subtracted from beyond 0. Introduce a method to subtract from Line.Level // and use that everywhere in the Parser. OwningPtr Line; + + // Comments are sorted into unwrapped lines by whether they are in the same + // line as the previous token, or not. If not, they belong to the next token. + // Since the next token might already be in a new unwrapped line, we need to + // store the comments belonging to that token. + SmallVector CommentsBeforeNextToken; FormatToken FormatTok; bool MustBreakBeforeNextToken; diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 201a08bb5f..b0f0fdba36 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -1625,6 +1625,47 @@ TEST_F(FormatTest, MergeHandlingInTheFaceOfPreprocessorDirectives) { " if (true) continue;", ShortMergedIf); } +TEST_F(FormatTest, BlockCommentsInControlLoops) { + verifyFormat("if (0) /* a comment in a strange place */ {\n" + " f();\n" + "}"); + verifyFormat("if (0) /* a comment in a strange place */ {\n" + " f();\n" + "} /* another comment */ else /* comment #3 */ {\n" + " g();\n" + "}"); + verifyFormat("while (0) /* a comment in a strange place */ {\n" + " f();\n" + "}"); + verifyFormat("for (;;) /* a comment in a strange place */ {\n" + " f();\n" + "}"); + verifyFormat("do /* a comment in a strange place */ {\n" + " f();\n" + "} /* another comment */ while (0);"); +} + +TEST_F(FormatTest, BlockComments) { + EXPECT_EQ("/* */ /* */ /* */\n/* */ /* */ /* */", + format("/* *//* */ /* */\n/* *//* */ /* */")); + EXPECT_EQ("/* */ a /* */ b;", + format(" /* */ a/* */ b;")); + EXPECT_EQ("#define A /* */\\\n" + " b\n" + "/* */\n" + "someCall(\n" + " parameter);", + format("#define A /* */ b\n" + "/* */\n" + "someCall(parameter);", getLLVMStyleWithColumns(15))); + + EXPECT_EQ("#define A\n" + "/* */ someCall(\n" + " parameter);", + format("#define A\n" + "/* */someCall(parameter);", getLLVMStyleWithColumns(15))); +} + //===----------------------------------------------------------------------===// // Objective-C tests. //===----------------------------------------------------------------------===//