From: Alexander Kornienko Date: Tue, 11 Apr 2017 09:55:00 +0000 (+0000) Subject: [clang-format] Handle NSString literals by merging tokens. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e0be7bc05e7193f8f8f9a81d3ee1bec4d6b6ac45;p=clang [clang-format] Handle NSString literals by merging tokens. Summary: This fixes a few outstanding bugs: * incorrect breaking of NSString literals containing double-width characters; * inconsistent formatting of ObjC dictionary literals containing NSString literals; * AlwaysBreakBeforeMultilineStrings ignoring implicitly-concatenated NSString literals. Reviewers: djasper Reviewed By: djasper Subscribers: klimek, cfe-commits Differential Revision: https://reviews.llvm.org/D31706 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@299927 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index b42e4aee50..c97486e4e4 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -186,7 +186,7 @@ BreakableSingleLineToken::BreakableSingleLineToken( const FormatStyle &Style) : BreakableToken(Tok, InPPDirective, Encoding, Style), StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { - assert(Tok.TokenText.endswith(Postfix)); + assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } @@ -210,16 +210,9 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { - unsigned LeadingSpaces = StartColumn; - // The '@' of an ObjC string literal (@"Test") does not become part of the - // string token. - // FIXME: It might be a cleaner solution to merge the tokens as a - // precomputation step. - if (Prefix.startswith("@")) - --LeadingSpaces; Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, - Prefix, InPPDirective, 1, LeadingSpaces); + Prefix, InPPDirective, 1, StartColumn); } BreakableComment::BreakableComment(const FormatToken &Token, diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index c3f386bd67..73ae10a29f 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -679,11 +679,11 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (Current.is(tok::identifier) && Current.Next && Current.Next->is(TT_DictLiteral)) return State.Stack.back().Indent; - if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) - return State.StartOfStringLiteral; if (NextNonComment->is(TT_ObjCStringLiteral) && State.StartOfStringLiteral != 0) return State.StartOfStringLiteral - 1; + if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) + return State.StartOfStringLiteral; if (NextNonComment->is(tok::lessless) && State.Stack.back().FirstLessLess != 0) return State.Stack.back().FirstLessLess; @@ -864,10 +864,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, moveStatePastScopeOpener(State, Newline); moveStatePastFakeRParens(State); - if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) - State.StartOfStringLiteral = State.Column; if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column + 1; + else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) + State.StartOfStringLiteral = State.Column; else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && !Current.isStringLiteral()) State.StartOfStringLiteral = 0; @@ -1175,18 +1175,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, StringRef Text = Current.TokenText; StringRef Prefix; StringRef Postfix; - bool IsNSStringLiteral = false; // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to // reduce the overhead) for each FormatToken, which is a string, so that we // don't run multiple checks here on the hot path. - if (Text.startswith("\"") && Current.Previous && - Current.Previous->is(tok::at)) { - IsNSStringLiteral = true; - Prefix = "@\""; - } if ((Text.endswith(Postfix = "\"") && - (IsNSStringLiteral || Text.startswith(Prefix = "\"") || + (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp index 91c9af005a..9415dbe9ab 100644 --- a/lib/Format/FormatTokenLexer.cpp +++ b/lib/Format/FormatTokenLexer.cpp @@ -64,6 +64,8 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; if (tryMergeLessLess()) return; + if (tryMergeNSStringLiteral()) + return; if (Style.Language == FormatStyle::LK_JavaScript) { static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; @@ -84,6 +86,22 @@ void FormatTokenLexer::tryMergePreviousTokens() { } } +bool FormatTokenLexer::tryMergeNSStringLiteral() { + if (Tokens.size() < 2) + return false; + auto &At = *(Tokens.end() - 2); + auto &String = *(Tokens.end() - 1); + if (!At->is(tok::at) || !String->is(tok::string_literal)) + return false; + At->Tok.setKind(tok::string_literal); + At->TokenText = StringRef(At->TokenText.begin(), + String->TokenText.end() - At->TokenText.begin()); + At->ColumnWidth += String->ColumnWidth; + At->Type = TT_ObjCStringLiteral; + Tokens.erase(Tokens.end() - 1); + return true; +} + bool FormatTokenLexer::tryMergeLessLess() { // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. if (Tokens.size() < 3) diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h index c47b0e725d..bf10f09cd1 100644 --- a/lib/Format/FormatTokenLexer.h +++ b/lib/Format/FormatTokenLexer.h @@ -47,6 +47,7 @@ private: void tryMergePreviousTokens(); bool tryMergeLessLess(); + bool tryMergeNSStringLiteral(); bool tryMergeTokens(ArrayRef Kinds, TokenType NewType); diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index a63b345cb9..004800fc2a 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -907,7 +907,7 @@ private: TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_OverloadedOperator, TT_RegexLiteral, - TT_TemplateString)) + TT_TemplateString, TT_ObjCStringLiteral)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -1120,21 +1120,17 @@ private: } } } else if (Current.is(tok::at) && Current.Next) { - if (Current.Next->isStringLiteral()) { - Current.Type = TT_ObjCStringLiteral; - } else { - switch (Current.Next->Tok.getObjCKeywordID()) { - case tok::objc_interface: - case tok::objc_implementation: - case tok::objc_protocol: - Current.Type = TT_ObjCDecl; - break; - case tok::objc_property: - Current.Type = TT_ObjCProperty; - break; - default: - break; - } + switch (Current.Next->Tok.getObjCKeywordID()) { + case tok::objc_interface: + case tok::objc_implementation: + case tok::objc_protocol: + Current.Type = TT_ObjCDecl; + break; + case tok::objc_property: + Current.Type = TT_ObjCProperty; + break; + default: + break; } } else if (Current.is(tok::period)) { FormatToken *PreviousNoComment = Current.getPreviousNonComment(); @@ -2457,8 +2453,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_Cpp || Style.Language == FormatStyle::LK_ObjC || Style.Language == FormatStyle::LK_Proto) { - if (Left.isStringLiteral() && - (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral))) + if (Left.isStringLiteral() && Right.isStringLiteral()) return true; } diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index a5b3d068db..e6ba2230ac 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -4090,9 +4090,9 @@ TEST_F(FormatTest, AlwaysBreakBeforeMultilineStrings) { "c\";", Break)); - // Exempt ObjC strings for now. - EXPECT_EQ("NSString *const kString = @\"aaaa\"\n" - " @\"bbbb\";", + EXPECT_EQ("NSString *const kString =\n" + " @\"aaaa\"\n" + " @\"bbbb\";", format("NSString *const kString = @\"aaaa\"\n" "@\"bbbb\";", Break)); @@ -6460,6 +6460,7 @@ TEST_F(FormatTest, BreaksWideAndNSStringLiterals) { EXPECT_EQ("@\"NSString \"\n" "@\"literal\";", format("@\"NSString literal\";", getGoogleStyleWithColumns(19))); + verifyFormat(R"(NSString *s = @"那那那那";)", getLLVMStyleWithColumns(26)); // This input makes clang-format try to split the incomplete unicode escape // sequence, which used to lead to a crasher. @@ -8301,7 +8302,14 @@ TEST_F(FormatTest, AllmanBraceBreaking) { // .. or dict literals. verifyFormat("void f()\n" "{\n" - " [object someMethod:@{ @\"a\" : @\"b\" }];\n" + " // ...\n" + " [object someMethod:@{@\"a\" : @\"b\"}];\n" + "}", + AllmanBraceStyle); + verifyFormat("void f()\n" + "{\n" + " // ...\n" + " [object someMethod:@{a : @\"b\"}];\n" "}", AllmanBraceStyle); verifyFormat("int f()\n"