Expand parsing of braced init lists.

author Manuel Klimek <klimek@google.com>

Thu, 23 May 2013 09:41:43 +0000 (09:41 +0000)

committer Manuel Klimek <klimek@google.com>

Thu, 23 May 2013 09:41:43 +0000 (09:41 +0000)
author Manuel Klimek <klimek@google.com>
Thu, 23 May 2013 09:41:43 +0000 (09:41 +0000)
committer Manuel Klimek <klimek@google.com>
Thu, 23 May 2013 09:41:43 +0000 (09:41 +0000)
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp

index 4138bb98aed99539dc27498a601ea636de64a926..9c33e4f484ee9c1a2f9e737761421472925d7e74 100644 (file)
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -72,6 +72,15 @@ public:
      return Token;
    }
  
+  virtual unsigned getPosition() {
+    return PreviousTokenSource->getPosition();
+  }
+
+  virtual FormatToken setPosition(unsigned Position) {
+    Token = PreviousTokenSource->setPosition(Position);
+    return Token;
+  }
+
  private:
    bool eof() { return Token.HasUnescapedNewline; }
  
@@ -124,15 +133,49 @@ private:
    UnwrappedLine *PreBlockLine;
  };
  
+class IndexedTokenSource : public FormatTokenSource {
+public:
+  IndexedTokenSource(ArrayRef<FormatToken> Tokens)
+      : Tokens(Tokens), Position(-1) {}
+
+  virtual FormatToken getNextToken() {
+    ++Position;
+    return Tokens[Position];
+  }
+
+  virtual unsigned getPosition() {
+    assert(Position >= 0);
+    return Position;
+  }
+
+  virtual FormatToken setPosition(unsigned P) {
+    Position = P;
+    return Tokens[Position];
+  }
+
+private:
+  ArrayRef<FormatToken> Tokens;
+  int Position;
+};
+
  UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
                                           FormatTokenSource &Tokens,
                                           UnwrappedLineConsumer &Callback)
      : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
        CurrentLines(&Lines), StructuralError(false), Style(Style),
-      Tokens(&Tokens), Callback(Callback) {}
+      Tokens(NULL), Callback(Callback) {
+  FormatToken Tok;
+  do {
+    Tok = Tokens.getNextToken();
+    AllTokens.push_back(Tok);
+  } while (Tok.Tok.isNot(tok::eof));
+  LBraces.resize(AllTokens.size(), BS_Unknown);
+}
  
  bool UnwrappedLineParser::parse() {
    DEBUG(llvm::dbgs() << "----\n");
+  IndexedTokenSource TokenSource(AllTokens);
+  Tokens = &TokenSource;
    readToken();
    parseFile();
    for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
@@ -183,6 +226,68 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
    } while (!eof());
  }
  
+void UnwrappedLineParser::calculateBraceTypes() {
+  // We'll parse forward through the tokens until we hit
+  // a closing brace or eof - note that getNextToken() will
+  // parse macros, so this will magically work inside macro
+  // definitions, too.
+  unsigned StoredPosition = Tokens->getPosition();
+  unsigned Position = StoredPosition;
+  FormatToken Tok = FormatTok;
+  // Keep a stack of positions of lbrace tokens. We will
+  // update information about whether an lbrace starts a
+  // braced init list or a different block during the loop.
+  SmallVector<unsigned, 8> LBraceStack;
+  assert(Tok.Tok.is(tok::l_brace));
+  do {
+    FormatToken NextTok = Tokens->getNextToken();
+    switch (Tok.Tok.getKind()) {
+    case tok::l_brace:
+      LBraceStack.push_back(Position);
+      break;
+    case tok::r_brace:
+      if (!LBraceStack.empty()) {
+        if (LBraces[LBraceStack.back()] == BS_Unknown) {
+          // If there is a comma, semicolon or right paren after the closing
+          // brace, we assume this is a braced initializer list.
+
+          // FIXME: Note that this currently works only because we do not
+          // use the brace information while inside a braced init list.
+          // Thus, if the parent is a braced init list, we consider all
+          // brace blocks inside it braced init list. That works good enough
+          // for now, but we will need to fix it to correctly handle lambdas.
+          if (NextTok.Tok.is(tok::comma) || NextTok.Tok.is(tok::semi) ||
+              NextTok.Tok.is(tok::r_paren))
+            LBraces[LBraceStack.back()] = BS_BracedInit;
+          else
+            LBraces[LBraceStack.back()] = BS_Block;
+        }
+        LBraceStack.pop_back();
+      }
+      break;
+    case tok::semi:
+    case tok::kw_if:
+    case tok::kw_while:
+    case tok::kw_for:
+    case tok::kw_switch:
+    case tok::kw_try:
+      if (!LBraceStack.empty()) 
+        LBraces[LBraceStack.back()] = BS_Block;
+      break;
+    default:
+      break;
+    }
+    Tok = NextTok;
+    ++Position;
+  } while (Tok.Tok.isNot(tok::eof));
+  // Assume other blocks for all unclosed opening braces.
+  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
+    if (LBraces[LBraceStack[i]] == BS_Unknown)
+      LBraces[LBraceStack[i]] = BS_Block;
+  }
+  FormatTok = Tokens->setPosition(StoredPosition);
+}
+
  void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
                                       unsigned AddLevels) {
    assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
@@ -394,17 +499,21 @@ void UnwrappedLineParser::parseStructuralElement() {
        parseParens();
        break;
      case tok::l_brace:
-      // A block outside of parentheses must be the last part of a
-      // structural element.
-      // FIXME: Figure out cases where this is not true, and add projections for
-      // them (the one we know is missing are lambdas).
-      if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
-          Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
+      if (!tryToParseBracedList()) {
+        // A block outside of parentheses must be the last part of a
+        // structural element.
+        // FIXME: Figure out cases where this is not true, and add projections
+        // for them (the one we know is missing are lambdas).
+        if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
+            Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
+          addUnwrappedLine();
+        parseBlock(/*MustBeDeclaration=*/ false);
          addUnwrappedLine();
-
-      parseBlock(/*MustBeDeclaration=*/ false);
-      addUnwrappedLine();
-      return;
+        return;
+      }
+      // Otherwise this was a braced init list, and the structural
+      // element continues.
+      break;
      case tok::identifier:
        nextToken();
        if (Line->Tokens.size() == 1) {
@@ -436,6 +545,16 @@ void UnwrappedLineParser::parseStructuralElement() {
    } while (!eof());
  }
  
+bool UnwrappedLineParser::tryToParseBracedList() {
+  if (LBraces[Tokens->getPosition()] == BS_Unknown)
+    calculateBraceTypes();
+  assert(LBraces[Tokens->getPosition()] != BS_Unknown);
+  if (LBraces[Tokens->getPosition()] == BS_Block)
+    return false;
+  parseBracedList();
+  return true;
+}
+
  void UnwrappedLineParser::parseBracedList() {
    nextToken();
  
@@ -517,13 +636,15 @@ void UnwrappedLineParser::parseParens() {
        nextToken();
        return;
      case tok::l_brace: {
-      nextToken();
-      ScopedLineState LineState(*this);
-      ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
-                                              /*MustBeDeclaration=*/ false);
-      Line->Level += 1;
-      parseLevel(/*HasOpeningBrace=*/ true);
-      Line->Level -= 1;
+      if (!tryToParseBracedList()) {
+        nextToken();
+        ScopedLineState LineState(*this);
+        ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+                                                /*MustBeDeclaration=*/ false);
+        Line->Level += 1;
+        parseLevel(/*HasOpeningBrace=*/ true);
+        Line->Level -= 1;
+      }
        break;
      }
      case tok::at:
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h

index 82e93bef4809f06576484462831b933d5a6281a9..842b1086460a6475fe786dac596afc793541a04a 100644 (file)
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -122,6 +122,12 @@ public:
    virtual ~FormatTokenSource() {
    }
    virtual FormatToken getNextToken() = 0;
+
+  // FIXME: This interface will become an implementation detail of
+  // the UnwrappedLineParser once we switch to generate all tokens
+  // up-front.
+  virtual unsigned getPosition() { return 0; }
+  virtual FormatToken setPosition(unsigned Position) { assert(false); }
  };
  
  class UnwrappedLineParser {
@@ -140,6 +146,7 @@ private:
    void parsePPDefine();
    void parsePPUnknown();
    void parseStructuralElement();
+  bool tryToParseBracedList();
    void parseBracedList();
    void parseReturn();
    void parseParens();
@@ -163,6 +170,14 @@ private:
    void readToken();
    void flushComments(bool NewlineBeforeNext);
    void pushToken(const FormatToken &Tok);
+  void calculateBraceTypes();
+
+  // Represents what type of block a left brace opens.
+  enum LBraceState {
+    BS_Unknown,
+    BS_Block,
+    BS_BracedInit
+  };
  
    // FIXME: We are constantly running into bugs where Line.Level is incorrectly
    // subtracted from beyond 0. Introduce a method to subtract from Line.Level
@@ -203,6 +218,16 @@ private:
    FormatTokenSource *Tokens;
    UnwrappedLineConsumer &Callback;
  
+  // FIXME: This is a temporary measure until we have reworked the ownership
+  // of the format tokens. The goal is to have the actual tokens created and
+  // owned outside of and handed into the UnwrappedLineParser.
+  SmallVector<FormatToken, 16> AllTokens;
+
+  // FIXME: Currently we cannot store attributes with tokens, as we treat
+  // them as read-only; thus, we now store the brace state indexed by the
+  // position of the token in the stream (see \c AllTokens).
+  SmallVector<LBraceState, 16> LBraces;
+
    friend class ScopedLineState;
  };
  
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp

index 6530c30d7d7ce4977272256956242232fcb4421b..c15e0622b46c64ebb979313461a4d1111076cce3 100644 (file)
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -2706,7 +2706,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {
    verifyIndependentOfContext("Type **A = static_cast<Type **>(P);");
    verifyGoogleFormat("Type** A = static_cast<Type**>(P);");
    // FIXME: The newline is wrong.
-  verifyFormat("auto a = [](int **&, int ***) {}\n;");
+  verifyFormat("auto a = [](int **&, int ***) {};");
  
    verifyIndependentOfContext("InvalidRegions[*R] = 0;");
  
@@ -3100,6 +3100,14 @@ TEST_F(FormatTest, LayoutBraceInitializersInReturnStatement) {
    verifyFormat("return (a)(b) { 1, 2, 3 };");
  }
  
+TEST_F(FormatTest, LayoutCxx11ConstructorBraceInitializers) {
+    verifyFormat("vector<int> x { 1, 2, 3, 4 };");
+    verifyFormat("vector<T> x { {}, {}, {}, {} };");
+    verifyFormat("f({ 1, 2 });");
+    verifyFormat("auto v = Foo { 1 };");
+    verifyFormat("f({ 1, 2 }, { { 2, 3 }, { 4, 5 } }, c, { d });");
+}
+
  TEST_F(FormatTest, LayoutTokensFollowingBlockInParentheses) {
    // FIXME: This is bad, find a better and more generic solution.
    verifyFormat(
@@ -3859,8 +3867,7 @@ TEST_F(FormatTest, ObjCLiterals) {
                 "{ @2 : @1 }\n"
                 "}");
    verifyFormat("@{ @\"one\" : @\n"
-               "{ @2 : @1 }\n"
-               ",\n"
+               "{ @2 : @1 },\n"
                 "}");
  
    verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");
author	Manuel Klimek <klimek@google.com>
	Thu, 23 May 2013 09:41:43 +0000 (09:41 +0000)
committer	Manuel Klimek <klimek@google.com>
	Thu, 23 May 2013 09:41:43 +0000 (09:41 +0000)
lib/Format/UnwrappedLineParser.cpp		patch \| blob \| history
lib/Format/UnwrappedLineParser.h		patch \| blob \| history
unittests/Format/FormatTest.cpp		patch \| blob \| history