From 2c2f729d10cb10324bf2e7871867f7bbc19ad358 Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Mon, 16 Sep 2013 20:20:49 +0000 Subject: [PATCH] When in pre-c++11 mode, treat _T("xxx") as a single string literal, repeat the _T() part around each fragment. This addresses http://llvm.org/PR17122 Reviewers: djasper Reviewed By: djasper CC: cfe-commits, klimek, rsmith Differential Revision: http://llvm-reviews.chandlerc.com/D1640 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@190804 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Format/BreakableToken.cpp | 33 ++++++------- lib/Format/BreakableToken.h | 1 + lib/Format/ContinuationIndenter.cpp | 51 ++++++++++++++++--- lib/Format/Format.cpp | 38 ++++++++++++++- lib/Format/WhitespaceManager.cpp | 8 ++- unittests/Format/FormatTest.cpp | 76 ++++++++++++++++++++++++----- 6 files changed, 164 insertions(+), 43 deletions(-) diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 566769afea..b23819d102 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -82,20 +82,18 @@ static BreakableToken::Split getCommentSplit(StringRef Text, } static BreakableToken::Split getStringSplit(StringRef Text, - unsigned ContentStartColumn, + unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding) { // FIXME: Reduce unit test case. if (Text.empty()) return BreakableToken::Split(StringRef::npos, 0); - if (ColumnLimit <= ContentStartColumn) + if (ColumnLimit <= UsedColumns) return BreakableToken::Split(StringRef::npos, 0); - unsigned MaxSplit = - std::min(ColumnLimit - ContentStartColumn, - encoding::columnWidthWithTabs(Text, ContentStartColumn, - TabWidth, Encoding) - - 1); + unsigned MaxSplit = std::min( + ColumnLimit - UsedColumns, + encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1); StringRef::size_type SpaceOffset = 0; StringRef::size_type SlashOffset = 0; StringRef::size_type WordStartOffset = 0; @@ -107,9 +105,8 @@ static BreakableToken::Split getStringSplit(StringRef Text, Chars += Advance; } else { Advance = encoding::getCodePointNumBytes(Text[0], Encoding); - Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance), - ContentStartColumn + Chars, - TabWidth, Encoding); + Chars += encoding::columnWidthWithTabs( + Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); } if (Chars > MaxSplit) @@ -158,19 +155,19 @@ BreakableSingleLineToken::BreakableSingleLineToken( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } -BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, - unsigned StartColumn, - bool InPPDirective, - encoding::Encoding Encoding, - const FormatStyle &Style) - : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective, +BreakableStringLiteral::BreakableStringLiteral( + const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective, Encoding, Style) {} BreakableToken::Split BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { - return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit, - Style.TabWidth, Encoding); + return getStringSplit(Line.substr(TailOffset), + StartColumn + Prefix.size() + Postfix.size(), + ColumnLimit, Style.TabWidth, Encoding); } void BreakableStringLiteral::insertBreak(unsigned LineIndex, diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index 65b9015251..af3b09b5a5 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -110,6 +110,7 @@ public: /// \p StartColumn specifies the column in which the token will start /// after formatting. BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, + StringRef Prefix, StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 7066e15d5d..87c022294e 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -633,6 +633,25 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, return 0; } +static bool getRawStringLiteralPrefixPostfix(StringRef Text, + StringRef &Prefix, + StringRef &Postfix) { + if (Text.startswith(Prefix = "R\"") || Text.startswith(Prefix = "uR\"") || + Text.startswith(Prefix = "UR\"") || Text.startswith(Prefix = "u8R\"") || + Text.startswith(Prefix = "LR\"")) { + size_t ParenPos = Text.find('('); + if (ParenPos != StringRef::npos) { + StringRef Delimiter = + Text.substr(Prefix.size(), ParenPos - Prefix.size()); + Prefix = Text.substr(0, ParenPos + 1); + Postfix = Text.substr(Text.size() - 2 - Delimiter.size()); + return Postfix.front() == ')' && Postfix.back() == '"' && + Postfix.substr(1).startswith(Delimiter); + } + } + return false; +} + unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun) { @@ -641,24 +660,42 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, if (Current.Type != TT_BlockComment && Current.IsMultiline) return addMultilineToken(Current, State); - if (!Current.isOneOf(tok::string_literal, tok::comment)) + if (!Current.isOneOf(tok::string_literal, tok::wide_string_literal, + tok::utf8_string_literal, tok::utf16_string_literal, + tok::utf32_string_literal, tok::comment)) return 0; llvm::OwningPtr Token; unsigned StartColumn = State.Column - Current.ColumnWidth; - if (Current.is(tok::string_literal) && + if (Current.isOneOf(tok::string_literal, tok::wide_string_literal, + tok::utf8_string_literal, tok::utf16_string_literal, + tok::utf32_string_literal) && Current.Type != TT_ImplicitStringLiteral) { - // Only break up default narrow strings. - if (!Current.TokenText.startswith("\"")) - return 0; // Exempts unterminated string literals from line breaking. The user will // likely want to terminate the string before any line breaking is done. if (Current.IsUnterminatedLiteral) return 0; - Token.reset(new BreakableStringLiteral( - Current, StartColumn, State.Line->InPPDirective, Encoding, Style)); + StringRef Text = Current.TokenText; + StringRef Prefix; + StringRef Postfix; + // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. + // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to + // reduce the overhead) for each FormatToken, which is a string, so that we + // don't run multiple checks here on the hot path. + if ((Text.endswith(Postfix = "\"") && + (Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") || + Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") || + Text.startswith(Prefix = "L\""))) || + (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")")) || + getRawStringLiteralPrefixPostfix(Text, Prefix, Postfix)) { + Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix, + Postfix, State.Line->InPPDirective, + Encoding, Style)); + } else { + return 0; + } } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { Token.reset(new BreakableBlockComment( Current, StartColumn, Current.OriginalColumn, !Current.Previous, diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 034f6e6ee8..5271a62e8f 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -591,6 +591,7 @@ public: assert(Tokens.empty()); do { Tokens.push_back(getNextToken()); + maybeJoinPreviousTokens(); } while (Tokens.back()->Tok.isNot(tok::eof)); return Tokens; } @@ -598,6 +599,40 @@ public: IdentifierTable &getIdentTable() { return IdentTable; } private: + void maybeJoinPreviousTokens() { + if (Tokens.size() < 4) + return; + FormatToken *Last = Tokens.back(); + if (!Last->is(tok::r_paren)) + return; + + FormatToken *String = Tokens[Tokens.size() - 2]; + if (!String->is(tok::string_literal) || String->IsMultiline) + return; + + if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) + return; + + FormatToken *Macro = Tokens[Tokens.size() - 4]; + if (Macro->TokenText != "_T") + return; + + const char *Start = Macro->TokenText.data(); + const char *End = Last->TokenText.data() + Last->TokenText.size(); + String->TokenText = StringRef(Start, End - Start); + String->IsFirst = Macro->IsFirst; + String->LastNewlineOffset = Macro->LastNewlineOffset; + String->WhitespaceRange = Macro->WhitespaceRange; + String->OriginalColumn = Macro->OriginalColumn; + String->ColumnWidth = encoding::columnWidthWithTabs( + String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); + + Tokens.pop_back(); + Tokens.pop_back(); + Tokens.pop_back(); + Tokens.back() = String; + } + FormatToken *getNextToken() { if (GreaterStashed) { // Create a synthesized second '>' token. @@ -1135,7 +1170,8 @@ private: CharSourceRange LineRange = CharSourceRange::getCharRange( First->WhitespaceRange.getBegin().getLocWithOffset( First->LastNewlineOffset), - Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1)); + Last->getStartOfNonWhitespace().getLocWithOffset( + Last->TokenText.size() - 1)); return touchesRanges(LineRange); } diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 4138442c0c..c0a3f5d60b 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -49,11 +49,9 @@ void WhitespaceManager::replaceWhitespace(const FormatToken &Tok, void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { - Changes.push_back( - Change(false, Tok.WhitespaceRange, /*Spaces=*/0, - SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1, - Tok.NewlinesBefore, "", "", Tok.Tok.getKind(), - InPPDirective && !Tok.IsFirst)); + Changes.push_back(Change(false, Tok.WhitespaceRange, /*Spaces=*/0, + Tok.OriginalColumn, Tok.NewlinesBefore, "", "", + Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst)); } void WhitespaceManager::replaceWhitespaceInToken( diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 020f448033..42293802f4 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -5468,6 +5468,66 @@ TEST_F(FormatTest, BreakStringLiterals) { format("#define A \"some text other\";", AlignLeft)); } +TEST_F(FormatTest, BreaksWideStringLiterals) { + EXPECT_EQ( + "u8\"utf8 string \"\n" + "u8\"literal\";", + format("u8\"utf8 string literal\";", getGoogleStyleWithColumns(16))); + EXPECT_EQ( + "u\"utf16 string \"\n" + "u\"literal\";", + format("u\"utf16 string literal\";", getGoogleStyleWithColumns(16))); + EXPECT_EQ( + "U\"utf32 string \"\n" + "U\"literal\";", + format("U\"utf32 string literal\";", getGoogleStyleWithColumns(16))); + EXPECT_EQ("L\"wide string \"\n" + "L\"literal\";", + format("L\"wide string literal\";", getGoogleStyleWithColumns(16))); +} + +TEST_F(FormatTest, BreaksRawStringLiterals) { + EXPECT_EQ("R\"x(raw )x\"\n" + "R\"x(literal)x\";", + format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15))); + EXPECT_EQ("uR\"x(raw )x\"\n" + "uR\"x(literal)x\";", + format("uR\"x(raw literal)x\";", getGoogleStyleWithColumns(16))); + EXPECT_EQ("u8R\"x(raw )x\"\n" + "u8R\"x(literal)x\";", + format("u8R\"x(raw literal)x\";", getGoogleStyleWithColumns(17))); + EXPECT_EQ("LR\"x(raw )x\"\n" + "LR\"x(literal)x\";", + format("LR\"x(raw literal)x\";", getGoogleStyleWithColumns(16))); + EXPECT_EQ("UR\"x(raw )x\"\n" + "UR\"x(literal)x\";", + format("UR\"x(raw literal)x\";", getGoogleStyleWithColumns(16))); +} + +TEST_F(FormatTest, BreaksStringLiteralsWithin_TMacro) { + FormatStyle Style = getLLVMStyleWithColumns(20); + EXPECT_EQ( + "_T(\"aaaaaaaaaaaaaa\")\n" + "_T(\"aaaaaaaaaaaaaa\")\n" + "_T(\"aaaaaaaaaaaa\")", + format(" _T(\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\")", Style)); + EXPECT_EQ("f(x, _T(\"aaaaaaaaa\")\n" + " _T(\"aaaaaa\"),\n" + " z);", + format("f(x, _T(\"aaaaaaaaaaaaaaa\"), z);", Style)); + + // FIXME: Handle embedded spaces in one iteration. + // EXPECT_EQ("_T(\"aaaaaaaaaaaaa\")\n" + // "_T(\"aaaaaaaaaaaaa\")\n" + // "_T(\"aaaaaaaaaaaaa\")\n" + // "_T(\"a\")", + // format(" _T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", + // getLLVMStyleWithColumns(20))); + EXPECT_EQ( + "_T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", + format(" _T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", Style)); +} + TEST_F(FormatTest, DontSplitStringLiteralsWithEscapedNewlines) { EXPECT_EQ( "aaaaaaaaaaa = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n" @@ -5519,17 +5579,6 @@ TEST_F(FormatTest, CountsCharactersInMultilineRawStringLiterals) { } TEST_F(FormatTest, SkipsUnknownStringLiterals) { - EXPECT_EQ( - "u8\"unsupported literal\";", - format("u8\"unsupported literal\";", getGoogleStyleWithColumns(15))); - EXPECT_EQ("u\"unsupported literal\";", - format("u\"unsupported literal\";", getGoogleStyleWithColumns(15))); - EXPECT_EQ("U\"unsupported literal\";", - format("U\"unsupported literal\";", getGoogleStyleWithColumns(15))); - EXPECT_EQ("L\"unsupported literal\";", - format("L\"unsupported literal\";", getGoogleStyleWithColumns(15))); - EXPECT_EQ("R\"x(raw literal)x\";", - format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15))); verifyFormat("string a = \"unterminated;"); EXPECT_EQ("function(\"unterminated,\n" " OtherParameter);", @@ -5626,7 +5675,10 @@ TEST_F(FormatTest, DoNotBreakStringLiteralsInEscapeSequence) { "\"00000000\"\n" "\"1\"", format("\"test\\000000000001\"", getLLVMStyleWithColumns(10))); - EXPECT_EQ("R\"(\\x\\x00)\"\n", + // FIXME: We probably don't need to care about escape sequences in raw + // literals. + EXPECT_EQ("R\"(\\x)\"\n" + "R\"(\\x00)\"\n", format("R\"(\\x\\x00)\"\n", getGoogleStyleWithColumns(7))); } -- 2.40.0