Don't remove backslashes from block comments.

author Alexander Kornienko <alexfh@google.com>

Fri, 14 Jun 2013 11:46:10 +0000 (11:46 +0000)

committer Alexander Kornienko <alexfh@google.com>

Fri, 14 Jun 2013 11:46:10 +0000 (11:46 +0000)
author Alexander Kornienko <alexfh@google.com>
Fri, 14 Jun 2013 11:46:10 +0000 (11:46 +0000)
committer Alexander Kornienko <alexfh@google.com>
Fri, 14 Jun 2013 11:46:10 +0000 (11:46 +0000)
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp

index d915aef2f947e1a0c2d0d148e42e85b44ee53343..48c1dee5bd7f5ffaa99066a8fd0ee9cdeca0eb0e 100644 (file)
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -119,13 +119,11 @@ unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
           encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
  }
  
-BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
-                                                   unsigned StartColumn,
-                                                   StringRef Prefix,
-                                                   StringRef Postfix,
-                                                   encoding::Encoding Encoding)
-    : BreakableToken(Tok, Encoding), StartColumn(StartColumn), Prefix(Prefix),
-      Postfix(Postfix) {
+BreakableSingleLineToken::BreakableSingleLineToken(
+    const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
+    : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
+      Prefix(Prefix), Postfix(Postfix) {
    assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
    Line = Tok.TokenText.substr(
        Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
@@ -133,8 +131,10 @@ BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
  
  BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
                                                 unsigned StartColumn,
+                                               bool InPPDirective,
                                                 encoding::Encoding Encoding)
-    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", Encoding) {}
+    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
+                               Encoding) {}
  
  BreakableToken::Split
  BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
@@ -145,7 +145,6 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
  
  void BreakableStringLiteral::insertBreak(unsigned LineIndex,
                                           unsigned TailOffset, Split Split,
-                                         bool InPPDirective,
                                           WhitespaceManager &Whitespaces) {
    Whitespaces.replaceWhitespaceInToken(
        Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
@@ -162,10 +161,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) {
  
  BreakableLineComment::BreakableLineComment(const FormatToken &Token,
                                             unsigned StartColumn,
+                                           bool InPPDirective,
                                             encoding::Encoding Encoding)
      : BreakableSingleLineToken(Token, StartColumn,
                                 getLineCommentPrefix(Token.TokenText), "",
-                               Encoding) {
+                               InPPDirective, Encoding) {
    OriginalPrefix = Prefix;
    if (Token.TokenText.size() > Prefix.size() &&
        isAlphanumeric(Token.TokenText[Prefix.size()])) {
@@ -184,7 +184,7 @@ BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
  }
  
  void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                       Split Split, bool InPPDirective,
+                                       Split Split,
                                         WhitespaceManager &Whitespaces) {
    Whitespaces.replaceWhitespaceInToken(
        Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
@@ -193,7 +193,6 @@ void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
  
  void
  BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
-                                              unsigned InPPDirective,
                                                WhitespaceManager &Whitespaces) {
    if (OriginalPrefix != Prefix) {
      Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
@@ -203,8 +202,9 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
  
  BreakableBlockComment::BreakableBlockComment(
      const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
-    unsigned OriginalStartColumn, bool FirstInLine, encoding::Encoding Encoding)
-    : BreakableToken(Token, Encoding) {
+    unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+    encoding::Encoding Encoding)
+    : BreakableToken(Token, InPPDirective, Encoding) {
    StringRef TokenText(Token.TokenText);
    assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
    TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
@@ -264,8 +264,18 @@ BreakableBlockComment::BreakableBlockComment(
  void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
                                               unsigned LineIndex,
                                               int IndentDelta) {
+  // When in a preprocessor directive, the trailing backslash in a block comment
+  // is not needed, but can serve a purpose of uniformity with necessary escaped
+  // newlines outside the comment. In this case we remove it here before
+  // trimming the trailing whitespace. The backslash will be re-added later when
+  // inserting a line break.
+  size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
+  if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
+    --EndOfPreviousLine;
+
    // Calculate the end of the non-whitespace text in the previous line.
-  size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
+  EndOfPreviousLine =
+      Lines[LineIndex - 1].find_last_not_of(" \t", EndOfPreviousLine);
    if (EndOfPreviousLine == StringRef::npos)
      EndOfPreviousLine = 0;
    else
@@ -313,7 +323,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
  }
  
  void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                        Split Split, bool InPPDirective,
+                                        Split Split,
                                          WhitespaceManager &Whitespaces) {
    StringRef Text = Lines[LineIndex].substr(TailOffset);
    StringRef Prefix = Decoration;
@@ -334,7 +344,6 @@ void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
  
  void
  BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
-                                               unsigned InPPDirective,
                                                 WhitespaceManager &Whitespaces) {
    if (LineIndex == 0)
      return;
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h

index 4c8755b95b50fd711ac3dbc7adb563fe4d4be939..9dab473d8a84d5956d0f8406c6d1ea249457ea74 100644 (file)
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -59,20 +59,20 @@ public:
  
    /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
    virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective,
                             WhitespaceManager &Whitespaces) = 0;
  
    /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
    virtual void replaceWhitespaceBefore(unsigned LineIndex,
-                                       unsigned InPPDirective,
                                         WhitespaceManager &Whitespaces) {}
  
  protected:
-  BreakableToken(const FormatToken &Tok, encoding::Encoding Encoding)
-      : Tok(Tok), Encoding(Encoding) {}
+  BreakableToken(const FormatToken &Tok, bool InPPDirective,
+                 encoding::Encoding Encoding)
+      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
  
    const FormatToken &Tok;
-  encoding::Encoding Encoding;
+  const bool InPPDirective;
+  const encoding::Encoding Encoding;
  };
  
  /// \brief Base class for single line tokens that can be broken.
@@ -88,7 +88,7 @@ public:
  protected:
    BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
                             StringRef Prefix, StringRef Postfix,
-                           encoding::Encoding Encoding);
+                           bool InPPDirective, encoding::Encoding Encoding);
  
    // The column in which the token starts.
    unsigned StartColumn;
@@ -107,12 +107,11 @@ public:
    /// \p StartColumn specifies the column in which the token will start
    /// after formatting.
    BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
-                         encoding::Encoding Encoding);
+                         bool InPPDirective, encoding::Encoding Encoding);
  
    virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                           unsigned ColumnLimit) const;
    virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective,
                             WhitespaceManager &Whitespaces);
  };
  
@@ -123,14 +122,13 @@ public:
    /// \p StartColumn specifies the column in which the comment will start
    /// after formatting.
    BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
-                       encoding::Encoding Encoding);
+                       bool InPPDirective, encoding::Encoding Encoding);
  
    virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                           unsigned ColumnLimit) const;
    virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces);
+                           WhitespaceManager &Whitespaces);
    virtual void replaceWhitespaceBefore(unsigned LineIndex,
-                                       unsigned InPPDirective,
                                         WhitespaceManager &Whitespaces);
  
  private:
@@ -148,7 +146,8 @@ public:
    /// If the comment starts a line after formatting, set \p FirstInLine to true.
    BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
                          unsigned StartColumn, unsigned OriginaStartColumn,
-                        bool FirstInLine, encoding::Encoding Encoding);
+                        bool FirstInLine, bool InPPDirective,
+                        encoding::Encoding Encoding);
  
    virtual unsigned getLineCount() const;
    virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
@@ -157,9 +156,8 @@ public:
    virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                           unsigned ColumnLimit) const;
    virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces);
+                           WhitespaceManager &Whitespaces);
    virtual void replaceWhitespaceBefore(unsigned LineIndex,
-                                       unsigned InPPDirective,
                                         WhitespaceManager &Whitespaces);
  
  private:
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp

index e0a05b64ecc6f64b6327a3ec56298c9f2589ff2c..92d58ba04c7bd2b23ae31ff14d7edbab70bf5132 100644 (file)
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -804,7 +804,6 @@ private:
    /// cover the cost of the additional line breaks.
    unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
                                  bool DryRun) {
-    unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
      llvm::OwningPtr<BreakableToken> Token;
      unsigned StartColumn = State.Column - Current.CodePointCount;
      unsigned OriginalStartColumn =
@@ -814,37 +813,34 @@ private:
      if (Current.is(tok::string_literal) &&
          Current.Type != TT_ImplicitStringLiteral) {
        // Only break up default narrow strings.
-      const char *LiteralData =
-          SourceMgr.getCharacterData(Current.getStartOfNonWhitespace());
-      if (!LiteralData || *LiteralData != '"')
+      if (!Current.TokenText.startswith("\""))
          return 0;
  
-      Token.reset(new BreakableStringLiteral(Current, StartColumn, Encoding));
+      Token.reset(new BreakableStringLiteral(Current, StartColumn,
+                                             Line.InPPDirective, Encoding));
      } else if (Current.Type == TT_BlockComment) {
-      BreakableBlockComment *BBC = new BreakableBlockComment(
+      Token.reset(new BreakableBlockComment(
            Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
-          Encoding);
-      Token.reset(BBC);
+          Line.InPPDirective, Encoding));
      } else if (Current.Type == TT_LineComment &&
                 (Current.Previous == NULL ||
                  Current.Previous->Type != TT_ImplicitStringLiteral)) {
-      Token.reset(new BreakableLineComment(Current, StartColumn, Encoding));
+      Token.reset(new BreakableLineComment(Current, StartColumn,
+                                           Line.InPPDirective, Encoding));
      } else {
        return 0;
      }
-    if (UnbreakableTailLength >= getColumnLimit())
+    if (Current.UnbreakableTailLength >= getColumnLimit())
        return 0;
-    unsigned RemainingSpace = getColumnLimit() - UnbreakableTailLength;
+    unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength;
  
      bool BreakInserted = false;
      unsigned Penalty = 0;
      unsigned PositionAfterLastLineInToken = 0;
      for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
           LineIndex != EndIndex; ++LineIndex) {
-      if (!DryRun) {
-        Token->replaceWhitespaceBefore(LineIndex, Line.InPPDirective,
-                                       Whitespaces);
-      }
+      if (!DryRun)
+        Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
        unsigned TailOffset = 0;
        unsigned RemainingTokenColumns = Token->getLineLengthAfterSplit(
            LineIndex, TailOffset, StringRef::npos);
@@ -858,10 +854,8 @@ private:
              LineIndex, TailOffset + Split.first + Split.second,
              StringRef::npos);
          assert(NewRemainingTokenColumns < RemainingTokenColumns);
-        if (!DryRun) {
-          Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
-                             Whitespaces);
-        }
+        if (!DryRun)
+          Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
          Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString
                                                     : Style.PenaltyBreakComment;
          unsigned ColumnsUsed =
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp

index c8bd945380a52c8cc0637843a187b3de7e14e92a..32f9d95d668036bfe5830f87984b7d72cfb6983f 100644 (file)
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -1961,6 +1961,10 @@ TEST_F(FormatTest, EscapedNewlineAtStartOfToken) {
    EXPECT_EQ("template <class T> f();", format("\\\ntemplate <class T> f();"));
  }
  
+TEST_F(FormatTest, NoEscapedNewlineHandlingInBlockComments) {
+  EXPECT_EQ("/* \\  \\  \\\n*/", format("\\\n/* \\  \\  \\\n*/"));
+}
+
  TEST_F(FormatTest, CalculateSpaceOnConsecutiveLinesInMacro) {
    verifyFormat("#define A \\\n"
                 "  int v(  \\\n"
@@ -4605,6 +4609,19 @@ TEST_F(FormatTest, BreakStringLiterals) {
        format("#define A \"some text other\";", AlignLeft));
  }
  
+TEST_F(FormatTest, SkipsUnknownStringLiterals) {
+  EXPECT_EQ("u8\"unsupported literal\";",
+            format("u8\"unsupported literal\";", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("u\"unsupported literal\";",
+            format("u\"unsupported literal\";", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("U\"unsupported literal\";",
+            format("U\"unsupported literal\";", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("L\"unsupported literal\";",
+            format("L\"unsupported literal\";", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("R\"x(raw literal)x\";",
+            format("R\"x(raw literal)x\";", getLLVMStyleWithColumns(15)));
+}
+
  TEST_F(FormatTest, BreakStringLiteralsBeforeUnbreakableTokenSequence) {
    EXPECT_EQ("someFunction(\"aaabbbcccd\"\n"
              "             \"ddeeefff\");",
author	Alexander Kornienko <alexfh@google.com>
	Fri, 14 Jun 2013 11:46:10 +0000 (11:46 +0000)
committer	Alexander Kornienko <alexfh@google.com>
	Fri, 14 Jun 2013 11:46:10 +0000 (11:46 +0000)
lib/Format/BreakableToken.cpp		patch \| blob \| history
lib/Format/BreakableToken.h		patch \| blob \| history
lib/Format/Format.cpp		patch \| blob \| history
unittests/Format/FormatTest.cpp		patch \| blob \| history