From: Manuel Klimek Date: Thu, 23 May 2013 09:41:43 +0000 (+0000) Subject: Expand parsing of braced init lists. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=80829bd8c3161cb706e28f29ac17a871f5eaaa5b;p=clang Expand parsing of braced init lists. Allows formatting of C++11 braced init list constructs, like: vector v { 1, 2, 3 }; f({ 1, 2 }); This involves some changes of how tokens are handled in the UnwrappedLineFormatter. Note that we have a plan to evolve the design of the token flow into one where we create all tokens up-front and then annotate them in the various layers (as we currently already have to create all tokens at once anyway, the current abstraction does not help). Thus, this introduces FIXMEs towards that goal. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@182568 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 4138bb98ae..9c33e4f484 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -72,6 +72,15 @@ public: return Token; } + virtual unsigned getPosition() { + return PreviousTokenSource->getPosition(); + } + + virtual FormatToken setPosition(unsigned Position) { + Token = PreviousTokenSource->setPosition(Position); + return Token; + } + private: bool eof() { return Token.HasUnescapedNewline; } @@ -124,15 +133,49 @@ private: UnwrappedLine *PreBlockLine; }; +class IndexedTokenSource : public FormatTokenSource { +public: + IndexedTokenSource(ArrayRef Tokens) + : Tokens(Tokens), Position(-1) {} + + virtual FormatToken getNextToken() { + ++Position; + return Tokens[Position]; + } + + virtual unsigned getPosition() { + assert(Position >= 0); + return Position; + } + + virtual FormatToken setPosition(unsigned P) { + Position = P; + return Tokens[Position]; + } + +private: + ArrayRef Tokens; + int Position; +}; + UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), StructuralError(false), Style(Style), - Tokens(&Tokens), Callback(Callback) {} + Tokens(NULL), Callback(Callback) { + FormatToken Tok; + do { + Tok = Tokens.getNextToken(); + AllTokens.push_back(Tok); + } while (Tok.Tok.isNot(tok::eof)); + LBraces.resize(AllTokens.size(), BS_Unknown); +} bool UnwrappedLineParser::parse() { DEBUG(llvm::dbgs() << "----\n"); + IndexedTokenSource TokenSource(AllTokens); + Tokens = &TokenSource; readToken(); parseFile(); for (std::vector::iterator I = Lines.begin(), E = Lines.end(); @@ -183,6 +226,68 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { } while (!eof()); } +void UnwrappedLineParser::calculateBraceTypes() { + // We'll parse forward through the tokens until we hit + // a closing brace or eof - note that getNextToken() will + // parse macros, so this will magically work inside macro + // definitions, too. + unsigned StoredPosition = Tokens->getPosition(); + unsigned Position = StoredPosition; + FormatToken Tok = FormatTok; + // Keep a stack of positions of lbrace tokens. We will + // update information about whether an lbrace starts a + // braced init list or a different block during the loop. + SmallVector LBraceStack; + assert(Tok.Tok.is(tok::l_brace)); + do { + FormatToken NextTok = Tokens->getNextToken(); + switch (Tok.Tok.getKind()) { + case tok::l_brace: + LBraceStack.push_back(Position); + break; + case tok::r_brace: + if (!LBraceStack.empty()) { + if (LBraces[LBraceStack.back()] == BS_Unknown) { + // If there is a comma, semicolon or right paren after the closing + // brace, we assume this is a braced initializer list. + + // FIXME: Note that this currently works only because we do not + // use the brace information while inside a braced init list. + // Thus, if the parent is a braced init list, we consider all + // brace blocks inside it braced init list. That works good enough + // for now, but we will need to fix it to correctly handle lambdas. + if (NextTok.Tok.is(tok::comma) || NextTok.Tok.is(tok::semi) || + NextTok.Tok.is(tok::r_paren)) + LBraces[LBraceStack.back()] = BS_BracedInit; + else + LBraces[LBraceStack.back()] = BS_Block; + } + LBraceStack.pop_back(); + } + break; + case tok::semi: + case tok::kw_if: + case tok::kw_while: + case tok::kw_for: + case tok::kw_switch: + case tok::kw_try: + if (!LBraceStack.empty()) + LBraces[LBraceStack.back()] = BS_Block; + break; + default: + break; + } + Tok = NextTok; + ++Position; + } while (Tok.Tok.isNot(tok::eof)); + // Assume other blocks for all unclosed opening braces. + for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { + if (LBraces[LBraceStack[i]] == BS_Unknown) + LBraces[LBraceStack[i]] = BS_Block; + } + FormatTok = Tokens->setPosition(StoredPosition); +} + void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels) { assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); @@ -394,17 +499,21 @@ void UnwrappedLineParser::parseStructuralElement() { parseParens(); break; case tok::l_brace: - // A block outside of parentheses must be the last part of a - // structural element. - // FIXME: Figure out cases where this is not true, and add projections for - // them (the one we know is missing are lambdas). - if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || - Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) + if (!tryToParseBracedList()) { + // A block outside of parentheses must be the last part of a + // structural element. + // FIXME: Figure out cases where this is not true, and add projections + // for them (the one we know is missing are lambdas). + if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || + Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/ false); addUnwrappedLine(); - - parseBlock(/*MustBeDeclaration=*/ false); - addUnwrappedLine(); - return; + return; + } + // Otherwise this was a braced init list, and the structural + // element continues. + break; case tok::identifier: nextToken(); if (Line->Tokens.size() == 1) { @@ -436,6 +545,16 @@ void UnwrappedLineParser::parseStructuralElement() { } while (!eof()); } +bool UnwrappedLineParser::tryToParseBracedList() { + if (LBraces[Tokens->getPosition()] == BS_Unknown) + calculateBraceTypes(); + assert(LBraces[Tokens->getPosition()] != BS_Unknown); + if (LBraces[Tokens->getPosition()] == BS_Block) + return false; + parseBracedList(); + return true; +} + void UnwrappedLineParser::parseBracedList() { nextToken(); @@ -517,13 +636,15 @@ void UnwrappedLineParser::parseParens() { nextToken(); return; case tok::l_brace: { - nextToken(); - ScopedLineState LineState(*this); - ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, - /*MustBeDeclaration=*/ false); - Line->Level += 1; - parseLevel(/*HasOpeningBrace=*/ true); - Line->Level -= 1; + if (!tryToParseBracedList()) { + nextToken(); + ScopedLineState LineState(*this); + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, + /*MustBeDeclaration=*/ false); + Line->Level += 1; + parseLevel(/*HasOpeningBrace=*/ true); + Line->Level -= 1; + } break; } case tok::at: diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 82e93bef48..842b108646 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -122,6 +122,12 @@ public: virtual ~FormatTokenSource() { } virtual FormatToken getNextToken() = 0; + + // FIXME: This interface will become an implementation detail of + // the UnwrappedLineParser once we switch to generate all tokens + // up-front. + virtual unsigned getPosition() { return 0; } + virtual FormatToken setPosition(unsigned Position) { assert(false); } }; class UnwrappedLineParser { @@ -140,6 +146,7 @@ private: void parsePPDefine(); void parsePPUnknown(); void parseStructuralElement(); + bool tryToParseBracedList(); void parseBracedList(); void parseReturn(); void parseParens(); @@ -163,6 +170,14 @@ private: void readToken(); void flushComments(bool NewlineBeforeNext); void pushToken(const FormatToken &Tok); + void calculateBraceTypes(); + + // Represents what type of block a left brace opens. + enum LBraceState { + BS_Unknown, + BS_Block, + BS_BracedInit + }; // FIXME: We are constantly running into bugs where Line.Level is incorrectly // subtracted from beyond 0. Introduce a method to subtract from Line.Level @@ -203,6 +218,16 @@ private: FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; + // FIXME: This is a temporary measure until we have reworked the ownership + // of the format tokens. The goal is to have the actual tokens created and + // owned outside of and handed into the UnwrappedLineParser. + SmallVector AllTokens; + + // FIXME: Currently we cannot store attributes with tokens, as we treat + // them as read-only; thus, we now store the brace state indexed by the + // position of the token in the stream (see \c AllTokens). + SmallVector LBraces; + friend class ScopedLineState; }; diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 6530c30d7d..c15e0622b4 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -2706,7 +2706,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyIndependentOfContext("Type **A = static_cast(P);"); verifyGoogleFormat("Type** A = static_cast(P);"); // FIXME: The newline is wrong. - verifyFormat("auto a = [](int **&, int ***) {}\n;"); + verifyFormat("auto a = [](int **&, int ***) {};"); verifyIndependentOfContext("InvalidRegions[*R] = 0;"); @@ -3100,6 +3100,14 @@ TEST_F(FormatTest, LayoutBraceInitializersInReturnStatement) { verifyFormat("return (a)(b) { 1, 2, 3 };"); } +TEST_F(FormatTest, LayoutCxx11ConstructorBraceInitializers) { + verifyFormat("vector x { 1, 2, 3, 4 };"); + verifyFormat("vector x { {}, {}, {}, {} };"); + verifyFormat("f({ 1, 2 });"); + verifyFormat("auto v = Foo { 1 };"); + verifyFormat("f({ 1, 2 }, { { 2, 3 }, { 4, 5 } }, c, { d });"); +} + TEST_F(FormatTest, LayoutTokensFollowingBlockInParentheses) { // FIXME: This is bad, find a better and more generic solution. verifyFormat( @@ -3859,8 +3867,7 @@ TEST_F(FormatTest, ObjCLiterals) { "{ @2 : @1 }\n" "}"); verifyFormat("@{ @\"one\" : @\n" - "{ @2 : @1 }\n" - ",\n" + "{ @2 : @1 },\n" "}"); verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");