From 10c26b2e974f97c75fc3aaa302ca750f422cbce1 Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Tue, 16 Jul 2013 21:06:13 +0000 Subject: [PATCH] Don't break line comments with escaped newlines. Summary: These can appear when comments contain command lines with quoted line breaks. As the text (including escaped newlines and '//' from consecutive lines) is a single line comment, we used to break it even when it didn't exceed column limit. This is a temporary solution, in the future we may want to support this case completely - at least adjust leading whitespace when changing indentation of the first line. Reviewers: djasper Reviewed By: djasper CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1146 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@186456 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Format/Format.cpp | 22 ++++++++++++++ lib/Format/TokenAnnotator.cpp | 5 ++-- unittests/Format/FormatTest.cpp | 51 +++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index c6927ecbe2..11ab58c18c 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -905,6 +905,11 @@ private: // Only break up default narrow strings. if (!Current.TokenText.startswith("\"")) return 0; + // Don't break string literals with escaped newlines. As clang-format must + // not change the string's content, it is unlikely that we'll end up with + // a better format. + if (Current.TokenText.find("\\\n") != StringRef::npos) + return 0; // Exempts unterminated string literals from line breaking. The user will // likely want to terminate the string before any line breaking is done. if (Current.IsUnterminatedLiteral) @@ -919,6 +924,23 @@ private: } else if (Current.Type == TT_LineComment && (Current.Previous == NULL || Current.Previous->Type != TT_ImplicitStringLiteral)) { + // Don't break line comments with escaped newlines. These look like + // separate line comments, but in fact contain a single line comment with + // multiple lines including leading whitespace and the '//' markers. + // + // FIXME: If we want to handle them correctly, we'll need to adjust + // leading whitespace in consecutive lines when changing indentation of + // the first line similar to what we do with block comments. + StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n"); + if (EscapedNewlinePos != StringRef::npos) { + State.Column = + StartColumn + + encoding::getCodePointCount( + Current.TokenText.substr(0, EscapedNewlinePos), Encoding) + + 1; + return 0; + } + Token.reset(new BreakableLineComment(Current, StartColumn, Line.InPPDirective, Encoding)); } else { diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 6f6f468c2a..67a0fa892d 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -967,8 +967,9 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->is(tok::string_literal) && Current->Previous->isNot(tok::lessless) && Current->Previous->Type != TT_InlineASMColon && - Current->getNextNonComment() && - Current->getNextNonComment()->is(tok::string_literal)) { + ((Current->getNextNonComment() && + Current->getNextNonComment()->is(tok::string_literal)) || + (Current->TokenText.find("\\\n") != StringRef::npos))) { Current->MustBreakBefore = true; } Current->CanBreakBefore = diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 3d5c21a2f7..433d0ec1d7 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -921,6 +921,32 @@ TEST_F(FormatTest, SplitsLongCxxComments) { getLLVMStyleWithColumns(20))); } +TEST_F(FormatTest, DontSplitLineCommentsWithEscapedNewlines) { + EXPECT_EQ("// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" + "// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" + "// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + format("// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" + "// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" + "// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); + EXPECT_EQ("int a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + format("int a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + getLLVMStyleWithColumns(50))); + // FIXME: One day we might want to implement adjustment of leading whitespace + // of the consecutive lines in this kind of comment: + EXPECT_EQ("int\n" + "a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + format("int a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + getLLVMStyleWithColumns(49))); +} + TEST_F(FormatTest, PriorityOfCommentBreaking) { EXPECT_EQ("if (xxx == yyy && // aaaaaaaaaaaa\n" " // bbbbbbbbb\n" @@ -2969,6 +2995,21 @@ TEST_F(FormatTest, AlwaysBreakBeforeMultilineStrings) { " \"bbbb\"\n" " \"cccc\");", format("aaaa(qqq, \"bbbb\" \"cccc\");", Break)); + EXPECT_EQ("x = \"a\\\n" + "b\\\n" + "c\";", + format("x = \"a\\\n" + "b\\\n" + "c\";", + NoBreak)); + EXPECT_EQ("x =\n" + " \"a\\\n" + "b\\\n" + "c\";", + format("x = \"a\\\n" + "b\\\n" + "c\";", + Break)); } TEST_F(FormatTest, AlignsPipes) { @@ -4992,6 +5033,16 @@ TEST_F(FormatTest, BreakStringLiterals) { format("#define A \"some text other\";", AlignLeft)); } +TEST_F(FormatTest, DontSplitStringLiteralsWithEscapedNewlines) { + EXPECT_EQ("aaaaaaaaaaa =\n" + " \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\";", + format("aaaaaaaaaaa = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\";")); +} + TEST_F(FormatTest, SkipsUnknownStringLiterals) { EXPECT_EQ( "u8\"unsupported literal\";", -- 2.50.1