Refactor ContinuationIndenter's breakProtrudingToken logic.

author Manuel Klimek <klimek@google.com>

Tue, 14 Nov 2017 09:19:53 +0000 (09:19 +0000)

committer Manuel Klimek <klimek@google.com>

Tue, 14 Nov 2017 09:19:53 +0000 (09:19 +0000)
author Manuel Klimek <klimek@google.com>
Tue, 14 Nov 2017 09:19:53 +0000 (09:19 +0000)
committer Manuel Klimek <klimek@google.com>
Tue, 14 Nov 2017 09:19:53 +0000 (09:19 +0000)
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp

index ad1ec9f8ad2994764f9559ca3a99fff12df6d614..67d75fd696b9926c036eeabefa7bf240046a3471 100644 (file)
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -749,6 +749,7 @@ BreakableLineCommentSection::BreakableLineCommentSection(
      Prefix.resize(Lines.size());
      OriginalPrefix.resize(Lines.size());
      for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+      Lines[i] = Lines[i].ltrim(Blanks);
        // We need to trim the blanks in case this is not the first line in a
        // multiline comment. Then the indent is included in Lines[i].
        StringRef IndentPrefix =
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp

index 6797ca2259642300f7ef4c303055e65dd27acb3d..f171bb5f957f50e1d28800a20f90c438456ceea2 100644 (file)
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -1008,8 +1008,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
  
    moveStatePastFakeLParens(State, Newline);
    moveStatePastScopeCloser(State);
-  bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&
-                                 !State.Stack.back().NoLineBreakInOperand;
+  bool AllowBreak = !State.Stack.back().NoLineBreak &&
+                    !State.Stack.back().NoLineBreakInOperand;
    moveStatePastScopeOpener(State, Newline);
    moveStatePastFakeRParens(State);
  
@@ -1023,13 +1023,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
  
    State.Column += Current.ColumnWidth;
    State.NextToken = State.NextToken->Next;
-  unsigned Penalty = 0;
-  if (CanBreakProtrudingToken)
-    Penalty = breakProtrudingToken(Current, State, DryRun);
-  if (State.Column > getColumnLimit(State)) {
-    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
-    Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
-  }
+
+  unsigned Penalty =
+      handleEndOfLine(Current, State, DryRun, AllowBreak);
  
    if (Current.Role)
      Current.Role->formatFromToken(State, this, DryRun);
@@ -1282,8 +1278,10 @@ static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn,
  }
  
  unsigned ContinuationIndenter::reformatRawStringLiteral(
-    const FormatToken &Current, unsigned StartColumn, LineState &State,
-    StringRef Delimiter, const FormatStyle &RawStringStyle, bool DryRun) {
+    const FormatToken &Current, LineState &State,
+    const FormatStyle &RawStringStyle, bool DryRun) {
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  auto Delimiter = *getRawStringDelimiter(Current.TokenText);
    // The text of a raw string is between the leading 'R"delimiter(' and the
    // trailing 'delimiter)"'.
    unsigned PrefixSize = 3 + Delimiter.size();
@@ -1353,9 +1351,6 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
  
  unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
                                                   LineState &State) {
-  if (!Current.IsMultiline)
-    return 0;
-
    // Break before further function parameters on all levels.
    for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
      State.Stack[i].BreakBeforeParameter = true;
@@ -1370,42 +1365,56 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
    return 0;
  }
  
-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
-                                                    LineState &State,
-                                                    bool DryRun) {
+unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
+                                               LineState &State, bool DryRun,
+                                               bool AllowBreak) {
+  unsigned Penalty = 0;
    // Compute the raw string style to use in case this is a raw string literal
    // that can be reformatted.
-  llvm::Optional<StringRef> Delimiter = None;
-  llvm::Optional<FormatStyle> RawStringStyle = None;
-  if (Current.isStringLiteral())
-    Delimiter = getRawStringDelimiter(Current.TokenText);
-  if (Delimiter)
-    RawStringStyle = RawStringFormats.get(*Delimiter);
-
-  // Don't break multi-line tokens other than block comments and raw string
-  // literals. Instead, just update the state.
-  if (Current.isNot(TT_BlockComment) && !RawStringStyle && Current.IsMultiline)
-    return addMultilineToken(Current, State);
-
-  // Don't break implicit string literals or import statements.
-  if (Current.is(TT_ImplicitStringLiteral) ||
-      State.Line->Type == LT_ImportStatement)
-    return 0;
+  auto RawStringStyle = getRawStringStyle(Current, State);
+  if (RawStringStyle) {
+    Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun);
+  } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {
+    // Don't break multi-line tokens other than block comments and raw string
+    // literals. Instead, just update the state.
+    Penalty = addMultilineToken(Current, State);
+  } else if (State.Line->Type != LT_ImportStatement) {
+    // We generally don't break import statements.
+    Penalty = breakProtrudingToken(Current, State, AllowBreak, DryRun);
+  }
+  if (State.Column > getColumnLimit(State)) {
+    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+    Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+  }
+  return Penalty;
+}
  
-  if (!Current.isStringLiteral() && !Current.is(tok::comment))
-    return 0;
+llvm::Optional<FormatStyle>
+ContinuationIndenter::getRawStringStyle(const FormatToken &Current,
+                                        const LineState &State) {
+  if (!Current.isStringLiteral())
+    return None;
+  auto Delimiter = getRawStringDelimiter(Current.TokenText);
+  if (!Delimiter)
+    return None;
+  auto RawStringStyle = RawStringFormats.get(*Delimiter);
+  if (!RawStringStyle)
+    return None;
+  RawStringStyle->ColumnLimit = getColumnLimit(State);
+  return RawStringStyle;
+}
  
-  std::unique_ptr<BreakableToken> Token;
+std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(
+    const FormatToken &Current, LineState &State, bool AllowBreak) {
    unsigned StartColumn = State.Column - Current.ColumnWidth;
-  unsigned ColumnLimit = getColumnLimit(State);
-
    if (Current.isStringLiteral()) {
      // FIXME: String literal breaking is currently disabled for Java and JS, as
      // it requires strings to be merged using "+" which we don't support.
      if (Style.Language == FormatStyle::LK_Java ||
          Style.Language == FormatStyle::LK_JavaScript ||
-        !Style.BreakStringLiterals)
-      return 0;
+        !Style.BreakStringLiterals ||
+        !AllowBreak)
+      return nullptr;
  
      // Don't break string literals inside preprocessor directives (except for
      // #define directives, as their contents are stored in separate lines and
@@ -1413,17 +1422,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
      // This way we avoid breaking code with line directives and unknown
      // preprocessor directives that contain long string literals.
      if (State.Line->Type == LT_PreprocessorDirective)
-      return 0;
+      return nullptr;
      // Exempts unterminated string literals from line breaking. The user will
      // likely want to terminate the string before any line breaking is done.
      if (Current.IsUnterminatedLiteral)
-      return 0;
+      return nullptr;
  
-    if (RawStringStyle) {
-      RawStringStyle->ColumnLimit = ColumnLimit;
-      return reformatRawStringLiteral(Current, StartColumn, State, *Delimiter,
-                                      *RawStringStyle, DryRun);
-    } 
      StringRef Text = Current.TokenText;
      StringRef Prefix;
      StringRef Postfix;
@@ -1437,36 +1441,48 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
            Text.startswith(Prefix = "u8\"") ||
            Text.startswith(Prefix = "L\""))) ||
          (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
-      Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
-                                             Postfix, State.Line->InPPDirective,
-                                             Encoding, Style));
-    } else {
-      return 0;
+      return llvm::make_unique<BreakableStringLiteral>(
+          Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective,
+          Encoding, Style);
      }
    } else if (Current.is(TT_BlockComment)) {
      if (!Style.ReflowComments ||
          // If a comment token switches formatting, like
          // /* clang-format on */, we don't want to break it further,
          // but we may still want to adjust its indentation.
-        switchesFormatting(Current))
-      return addMultilineToken(Current, State);
-    Token.reset(new BreakableBlockComment(
+        switchesFormatting(Current)) {
+      return nullptr;
+    }
+    return llvm::make_unique<BreakableBlockComment>(
          Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        State.Line->InPPDirective, Encoding, Style));
+        State.Line->InPPDirective, Encoding, Style);
    } else if (Current.is(TT_LineComment) &&
               (Current.Previous == nullptr ||
                Current.Previous->isNot(TT_ImplicitStringLiteral))) {
      if (!Style.ReflowComments ||
          CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
          switchesFormatting(Current))
-      return 0;
-    Token.reset(new BreakableLineCommentSection(
+      return nullptr;
+    return llvm::make_unique<BreakableLineCommentSection>(
          Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        /*InPPDirective=*/false, Encoding, Style));
+        /*InPPDirective=*/false, Encoding, Style);
+  }
+  return nullptr;
+}
+
+unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+                                                    LineState &State,
+                                                    bool AllowBreak,
+                                                    bool DryRun) {
+  std::unique_ptr<BreakableToken> Token =
+      createBreakableToken(Current, State, AllowBreak);
+  if (!Token)
+    return 0;
+  unsigned ColumnLimit = getColumnLimit(State);
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  if (Current.is(TT_LineComment)) {
      // We don't insert backslashes when breaking line comments.
      ColumnLimit = Style.ColumnLimit;
-  } else {
-    return 0;
    }
    if (Current.UnbreakableTailLength >= ColumnLimit)
      return 0;
diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h

index 3bc9929b3df07a991c96f905349ed2035a283c52..5bdd555109a0489c6d0a79b301d8d67282c3ec42 100644 (file)
--- a/lib/Format/ContinuationIndenter.h
+++ b/lib/Format/ContinuationIndenter.h
@@ -29,6 +29,7 @@ class SourceManager;
  namespace format {
  
  class AnnotatedLine;
+class BreakableToken;
  struct FormatToken;
  struct LineState;
  struct ParenState;
@@ -105,11 +106,20 @@ private:
    /// 
    /// \returns An extra penalty induced by reformatting the token.
    unsigned reformatRawStringLiteral(const FormatToken &Current,
-                                    unsigned StartColumn, LineState &State,
-                                    StringRef Delimiter,
+                                    LineState &State,
                                      const FormatStyle &RawStringStyle,
                                      bool DryRun);
  
+  /// \brief If the current token is at the end of the current line, handle
+  /// the transition to the next line.
+  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
+                           bool DryRun, bool AllowBreak);
+
+  /// \brief If \p Current is a raw string that is configured to be reformatted,
+  /// return the style to be used.
+  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
+                                                const LineState &State);
+
    /// \brief If the current token sticks out over the end of the line, break
    /// it if possible.
    ///
@@ -120,7 +130,13 @@ private:
    /// penalty for the column limit violation in the last line (and in single
    /// line tokens) is handled in \c addNextStateToQueue.
    unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
-                                bool DryRun);
+                                bool AllowBreak, bool DryRun);
+
+  /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
+  /// if the current token cannot be broken.
+  std::unique_ptr<BreakableToken>
+  createBreakableToken(const FormatToken &Current, LineState &State,
+                       bool AllowBreak);
  
    /// \brief Appends the next token to \p State and updates information
    /// necessary for indentation.
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp

index c61a56921afbebe1c24c9634ea367227989d4151..d9b40949e98860485bae1fc445899b4ef3cf3318 100644 (file)
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -6319,7 +6319,8 @@ TEST_F(FormatTest, HandlesIncludeDirectives) {
                 "#include_next <test.h>"
                 "#include \"abc.h\" // this is included for ABC\n"
                 "#include \"some long include\" // with a comment\n"
-               "#include \"some very long include paaaaaaaaaaaaaaaaaaaaaaath\"",
+               "#include \"some very long include path\"\n"
+               "#include <some/very/long/include/path>\n",
                 getLLVMStyleWithColumns(35));
    EXPECT_EQ("#include \"a.h\"", format("#include  \"a.h\""));
    EXPECT_EQ("#include <a>", format("#include<a>"));
diff --git a/unittests/Format/FormatTestComments.cpp b/unittests/Format/FormatTestComments.cpp

index ae94ccaa61af4018393cb5ab0a8493c1cb383b0e..5467e73b55c550616d0424d5ca18f2243e827e74 100644 (file)
--- a/unittests/Format/FormatTestComments.cpp
+++ b/unittests/Format/FormatTestComments.cpp
@@ -686,8 +686,7 @@ TEST_F(FormatTestComments, SplitsLongCxxComments) {
    EXPECT_EQ("{\n"
              "  //\n"
              "  //\\\n"
-            "  // long 1 2 3 4\n"
-            "  // 5\n"
+            "  // long 1 2 3 4 5\n"
              "}",
              format("{\n"
                     "  //\n"
@@ -695,6 +694,18 @@ TEST_F(FormatTestComments, SplitsLongCxxComments) {
                     "  // long 1 2 3 4 5\n"
                     "}",
                     getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  //\n"
+            "  //\\\n"
+            "  // long 1 2 3 4 5\n"
+            "  // 6\n"
+            "}",
+            format("{\n"
+                   "  //\n"
+                   "  //\\\n"
+                   "  // long 1 2 3 4 5 6\n"
+                   "}",
+                   getLLVMStyleWithColumns(20)));
  }
  
  TEST_F(FormatTestComments, PreservesHangingIndentInCxxComments) {
author	Manuel Klimek <klimek@google.com>
	Tue, 14 Nov 2017 09:19:53 +0000 (09:19 +0000)
committer	Manuel Klimek <klimek@google.com>
	Tue, 14 Nov 2017 09:19:53 +0000 (09:19 +0000)
lib/Format/BreakableToken.cpp		patch \| blob \| history
lib/Format/ContinuationIndenter.cpp		patch \| blob \| history
lib/Format/ContinuationIndenter.h		patch \| blob \| history
unittests/Format/FormatTest.cpp		patch \| blob \| history
unittests/Format/FormatTestComments.cpp		patch \| blob \| history