]> granicus.if.org Git - clang/commitdiff
Fixed calculation of penalty when breaking tokens.
authorAlexander Kornienko <alexfh@google.com>
Fri, 7 Jun 2013 16:02:52 +0000 (16:02 +0000)
committerAlexander Kornienko <alexfh@google.com>
Fri, 7 Jun 2013 16:02:52 +0000 (16:02 +0000)
Summary:
Introduced two new style parameters: PenaltyBreakComment and
PenaltyBreakString. Add penalty for each character of a breakable token beyond
the column limit (this relates mainly to comments, as they are broken only on
whitespace). Tuned PenaltyBreakComment to prefer comment breaking over breaking
inside most binary expressions.
Fixed a bug that prevented *, & and && from being considered TT_BinaryOperator
in the presense of adjacent comments.

Reviewers: klimek, djasper

Reviewed By: klimek

CC: cfe-commits
Differential Revision: http://llvm-reviews.chandlerc.com/D933

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@183530 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Format/Format.h
lib/Format/BreakableToken.cpp
lib/Format/BreakableToken.h
lib/Format/Format.cpp
lib/Format/TokenAnnotator.cpp
unittests/Format/FormatTest.cpp

index 73258cd64b1e1819b2e2c5489b633900191430da..595e798dd3cbb54c598769f2e7b53525d9676af6 100644 (file)
@@ -33,12 +33,18 @@ struct FormatStyle {
   /// \brief The column limit.
   unsigned ColumnLimit;
 
-  /// \brief The penalty for each character outside of the column limit.
-  unsigned PenaltyExcessCharacter;
-
   /// \brief The maximum number of consecutive empty lines to keep.
   unsigned MaxEmptyLinesToKeep;
 
+  /// \brief The penalty for each line break introduced inside a comment.
+  unsigned PenaltyBreakComment;
+
+  /// \brief The penalty for each line break introduced inside a string literal.
+  unsigned PenaltyBreakString;
+
+  /// \brief The penalty for each character outside of the column limit.
+  unsigned PenaltyExcessCharacter;
+
   /// \brief Set whether & and * bind to the type as opposed to the variable.
   bool PointerBindsToType;
 
@@ -144,6 +150,8 @@ struct FormatStyle {
            IndentWidth == R.IndentWidth &&
            MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep &&
            ObjCSpaceBeforeProtocolList == R.ObjCSpaceBeforeProtocolList &&
+           PenaltyBreakString == R.PenaltyBreakString &&
+           PenaltyBreakComment == R.PenaltyBreakComment &&
            PenaltyExcessCharacter == R.PenaltyExcessCharacter &&
            PenaltyReturnTypeOnItsOwnLine == R.PenaltyReturnTypeOnItsOwnLine &&
            PointerBindsToType == R.PointerBindsToType &&
index 5e5604c597f9d3adfb7b8cecd82af4c13c9f6d15..94b4322e7e0b5b388da34cb821d2c241f9327dca 100644 (file)
@@ -112,11 +112,10 @@ BreakableToken::Split getStringSplit(StringRef Text,
 
 unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
 
-unsigned
-BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
-                                                  unsigned TailOffset) const {
+unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
+    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
   return StartColumn + Prefix.size() + Postfix.size() +
-         encoding::getCodePointCount(Line.substr(TailOffset), Encoding);
+         encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
 }
 
 void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
@@ -268,11 +267,10 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
 
 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
 
-unsigned
-BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
-                                               unsigned TailOffset) const {
-  return getContentStartColumn(LineIndex, TailOffset) +
-         encoding::getCodePointCount(Lines[LineIndex].substr(TailOffset),
+unsigned BreakableBlockComment::getLineLengthAfterSplit(
+    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
+  return getContentStartColumn(LineIndex, Offset) +
+         encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
                                      Encoding) +
          // The last line gets a "*/" postfix.
          (LineIndex + 1 == Lines.size() ? 2 : 0);
index 157bff4c42fffb6f4cd476d25a6184920052178c..100bb19dc5df8826229eab9f51dbfb043d398cd2 100644 (file)
@@ -33,7 +33,7 @@ struct FormatStyle;
 /// strategy into the class, instead of controlling it from the outside.
 class BreakableToken {
 public:
-  // Contains starting character index and length of split.
+  /// \brief Contains starting character index and length of split.
   typedef std::pair<StringRef::size_type, unsigned> Split;
 
   virtual ~BreakableToken() {}
@@ -41,13 +41,15 @@ public:
   /// \brief Returns the number of lines in this token in the original code.
   virtual unsigned getLineCount() const = 0;
 
-  /// \brief Returns the rest of the length of the line at \p LineIndex,
-  /// when broken at \p TailOffset.
+  /// \brief Returns the number of columns required to format the piece of line
+  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
   ///
-  /// Note that previous breaks are not taken into account. \p TailOffset
-  /// is always specified from the start of the (original) line.
-  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const = 0;
+  /// Note that previous breaks are not taken into account. \p Offset is always
+  /// specified from the start of the (original) line.
+  /// \p Length can be set to StringRef::npos, which means "to the end of line".
+  virtual unsigned
+      getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
+                              StringRef::size_type Length) const = 0;
 
   /// \brief Returns a range (offset, length) at which to break the line at
   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
@@ -80,7 +82,8 @@ class BreakableSingleLineToken : public BreakableToken {
 public:
   virtual unsigned getLineCount() const;
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const;
+                                           unsigned TailOffset,
+                                           StringRef::size_type Length) const;
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                            bool InPPDirective, WhitespaceManager &Whitespaces);
 
@@ -139,7 +142,8 @@ public:
 
   virtual unsigned getLineCount() const;
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const;
+                                           unsigned TailOffset,
+                                           StringRef::size_type Length) const;
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const;
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
index 9dd5e4a0f214e25a53202696bf15189a8b22df74..f61f1fbb3fb568a7e25daf5beb466ea25788666d 100644 (file)
@@ -96,6 +96,8 @@ template <> struct MappingTraits<clang::format::FormatStyle> {
     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
     IO.mapOptional("ObjCSpaceBeforeProtocolList",
                    Style.ObjCSpaceBeforeProtocolList);
+    IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
+    IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
                    Style.PenaltyReturnTypeOnItsOwnLine);
@@ -130,6 +132,8 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.IndentCaseLabels = false;
   LLVMStyle.MaxEmptyLinesToKeep = 1;
   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
+  LLVMStyle.PenaltyBreakComment = 45;
+  LLVMStyle.PenaltyBreakString = 1000;
   LLVMStyle.PenaltyExcessCharacter = 1000000;
   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 75;
   LLVMStyle.PointerBindsToType = false;
@@ -157,6 +161,8 @@ FormatStyle getGoogleStyle() {
   GoogleStyle.IndentCaseLabels = true;
   GoogleStyle.MaxEmptyLinesToKeep = 1;
   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
+  GoogleStyle.PenaltyBreakComment = 45;
+  GoogleStyle.PenaltyBreakString = 1000;
   GoogleStyle.PenaltyExcessCharacter = 1000000;
   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
   GoogleStyle.PointerBindsToType = true;
@@ -840,8 +846,8 @@ private:
                                        Whitespaces);
       }
       unsigned TailOffset = 0;
-      unsigned RemainingTokenColumns =
-          Token->getLineLengthAfterSplit(LineIndex, TailOffset);
+      unsigned RemainingTokenColumns = Token->getLineLengthAfterSplit(
+          LineIndex, TailOffset, StringRef::npos);
       while (RemainingTokenColumns > RemainingSpace) {
         BreakableToken::Split Split =
             Token->getSplit(LineIndex, TailOffset, getColumnLimit());
@@ -849,15 +855,23 @@ private:
           break;
         assert(Split.first != 0);
         unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
-            LineIndex, TailOffset + Split.first + Split.second);
+            LineIndex, TailOffset + Split.first + Split.second,
+            StringRef::npos);
         assert(NewRemainingTokenColumns < RemainingTokenColumns);
         if (!DryRun) {
           Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
                              Whitespaces);
         }
+        Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString
+                                                   : Style.PenaltyBreakComment;
+        unsigned ColumnsUsed =
+            Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
+        if (ColumnsUsed > getColumnLimit()) {
+          Penalty +=
+              Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
+        }
         TailOffset += Split.first + Split.second;
         RemainingTokenColumns = NewRemainingTokenColumns;
-        Penalty += Style.PenaltyExcessCharacter;
         BreakInserted = true;
       }
       PositionAfterLastLineInToken = RemainingTokenColumns;
index 8b1382ed7c666a70fa023e1f9a25cb6154445e74..20709bbb3062451bf8c7e8ec85e68f62de3db714 100644 (file)
@@ -103,13 +103,16 @@ private:
       // '*' has to be a binary operator but determineStarAmpUsage() will
       // categorize it as an unary operator, so set the right type here.
       if (LookForDecls && CurrentToken->Next) {
-        FormatToken *Prev = CurrentToken->Previous;
-        FormatToken *Next = CurrentToken->Next;
-        if (Prev->Previous->is(tok::identifier) &&
-            Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
-            CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
-          Prev->Type = TT_BinaryOperator;
-          LookForDecls = false;
+        FormatToken *Prev = CurrentToken->getPreviousNoneComment();
+        if (Prev) {
+          FormatToken *PrevPrev = Prev->getPreviousNoneComment();
+          FormatToken *Next = CurrentToken->Next;
+          if (PrevPrev && PrevPrev->is(tok::identifier) &&
+              Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
+              CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
+            Prev->Type = TT_BinaryOperator;
+            LookForDecls = false;
+          }
         }
       }
 
index f5204b45768631260b5b67607fb167f0720ababb..6a2edc0f99e26232823f03e341ca91e22a30a5fc 100644 (file)
@@ -675,7 +675,7 @@ TEST_F(FormatTest, UnderstandsSingleLineComments) {
                "};");
   verifyGoogleFormat(
       "aaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
-      "    aaaaaaaaaaaaaaaaaaaaaa);  // 81 cols with this comment");
+      "    aaaaaaaaaaaaaaaaaaaaaa);  // 81_cols_with_this_comment");
   EXPECT_EQ("D(a, {\n"
             "  // test\n"
             "  int a;\n"
@@ -871,37 +871,36 @@ TEST_F(FormatTest, SplitsLongCxxComments) {
             format("// A comment before a macro definition\n"
                    "#define a b",
                    getLLVMStyleWithColumns(20)));
+}
 
-  EXPECT_EQ("/* A comment before\n"
-            " * a macro\n"
-            " * definition */\n"
-            "#define a b",
-            format("/* A comment before a macro definition */\n"
-                   "#define a b",
-                   getLLVMStyleWithColumns(20)));
-
-  EXPECT_EQ("/* some comment\n"
-            "     *   a comment\n"
-            "* that we break\n"
-            " * another comment\n"
-            "* we have to break\n"
-            "* a left comment\n"
-            " */",
-            format("  /* some comment\n"
-                   "       *   a comment that we break\n"
-                   "   * another comment we have to break\n"
-                   "* a left comment\n"
-                   "   */",
-                   getLLVMStyleWithColumns(20)));
-
-  EXPECT_EQ("/*\n"
-            "\n"
-            "\n"
-            "    */\n",
-            format("  /*       \n"
-                   "      \n"
-                   "               \n"
-                   "      */\n"));
+TEST_F(FormatTest, PriorityOfCommentBreaking) {
+  EXPECT_EQ("if (xxx == yyy && // aaaaaaaaaaaa\n"
+            "                  // bbbbbbbbb\n"
+            "    zzz)\n"
+            "  q();",
+            format("if (xxx == yyy && // aaaaaaaaaaaa bbbbbbbbb\n"
+                   "    zzz) q();",
+                   getLLVMStyleWithColumns(40)));
+  EXPECT_EQ("if (xxxxxxxxxx ==\n"
+            "        yyy && // aaaaaa bbbbbbbb cccc\n"
+            "    zzz)\n"
+            "  q();",
+            format("if (xxxxxxxxxx == yyy && // aaaaaa bbbbbbbb cccc\n"
+                   "    zzz) q();",
+                   getLLVMStyleWithColumns(40)));
+  EXPECT_EQ("if (xxxxxxxxxx &&\n"
+            "        yyy || // aaaaaa bbbbbbbb cccc\n"
+            "    zzz)\n"
+            "  q();",
+            format("if (xxxxxxxxxx && yyy || // aaaaaa bbbbbbbb cccc\n"
+                   "    zzz) q();",
+                   getLLVMStyleWithColumns(40)));
+  EXPECT_EQ("fffffffff(&xxx, // aaaaaaaaaaaa\n"
+            "                // bbbbbbbbbbb\n"
+            "          zzz);",
+            format("fffffffff(&xxx, // aaaaaaaaaaaa bbbbbbbbbbb\n"
+                   " zzz);",
+                   getLLVMStyleWithColumns(40)));
 }
 
 TEST_F(FormatTest, MultiLineCommentsInDefines) {
@@ -1059,6 +1058,37 @@ TEST_F(FormatTest, SplitsLongLinesInComments) {
                    "    ;\n"
                    "}",
                    getLLVMStyleWithColumns(30)));
+
+  EXPECT_EQ("/* A comment before\n"
+            " * a macro\n"
+            " * definition */\n"
+            "#define a b",
+            format("/* A comment before a macro definition */\n"
+                   "#define a b",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/* some comment\n"
+            "     *   a comment\n"
+            "* that we break\n"
+            " * another comment\n"
+            "* we have to break\n"
+            "* a left comment\n"
+            " */",
+            format("  /* some comment\n"
+                   "       *   a comment that we break\n"
+                   "   * another comment we have to break\n"
+                   "* a left comment\n"
+                   "   */",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/*\n"
+            "\n"
+            "\n"
+            "    */\n",
+            format("  /*       \n"
+                   "      \n"
+                   "               \n"
+                   "      */\n"));
 }
 
 TEST_F(FormatTest, SplitsLongLinesInCommentsInPreprocessor) {
@@ -3263,6 +3293,7 @@ TEST_F(FormatTest, FormatsCasts) {
 
   // FIXME: Without type knowledge, this can still fall apart miserably.
   verifyFormat("void f() { my_int a = (my_int) * b; }");
+  verifyFormat("void f() { return P ? (my_int) * P : (my_int)0; }");
   verifyFormat("my_int a = (my_int) ~0;");
   verifyFormat("my_int a = (my_int)++ a;");
   verifyFormat("my_int a = (my_int) + 2;");