From 561211d35b5a2825fba6d0c017044f6896b204fd Mon Sep 17 00:00:00 2001 From: Daniel Jasper Date: Tue, 16 Jul 2013 20:28:33 +0000 Subject: [PATCH] clang-format: Improve handling of unterminated string literals. Before, clang-format would simply eat these as they were recognized as whitespace. With this patch, they are mostly left alone. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@186454 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Format/Format.cpp | 52 ++++++++++++++++++++------------- lib/Format/FormatToken.h | 14 +++++---- lib/Format/TokenAnnotator.cpp | 7 ++--- unittests/Format/FormatTest.cpp | 5 ++++ 4 files changed, 49 insertions(+), 29 deletions(-) diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 27dd162d66..c6927ecbe2 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -905,6 +905,10 @@ private: // Only break up default narrow strings. if (!Current.TokenText.startswith("\"")) return 0; + // Exempts unterminated string literals from line breaking. The user will + // likely want to terminate the string before any line breaking is done. + if (Current.IsUnterminatedLiteral) + return 0; Token.reset(new BreakableStringLiteral(Current, StartColumn, Line.InPPDirective, Encoding)); @@ -1259,8 +1263,7 @@ private: } FormatTok = new (Allocator.Allocate()) FormatToken; - Lex.LexFromRawLexer(FormatTok->Tok); - StringRef Text = rawTokenText(FormatTok->Tok); + readRawToken(*FormatTok); SourceLocation WhitespaceStart = FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); if (SourceMgr.getFileOffset(WhitespaceStart) == 0) @@ -1269,16 +1272,16 @@ private: // Consume and record whitespace until we find a significant token. unsigned WhitespaceLength = TrailingWhitespace; while (FormatTok->Tok.is(tok::unknown)) { - unsigned Newlines = Text.count('\n'); + unsigned Newlines = FormatTok->TokenText.count('\n'); if (Newlines > 0) - FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1; + FormatTok->LastNewlineOffset = + WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1; FormatTok->NewlinesBefore += Newlines; - unsigned EscapedNewlines = Text.count("\\\n"); + unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n"); FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines; WhitespaceLength += FormatTok->Tok.getLength(); - Lex.LexFromRawLexer(FormatTok->Tok); - Text = rawTokenText(FormatTok->Tok); + readRawToken(*FormatTok); } // In case the token starts with escaped newlines, we want to @@ -1287,30 +1290,31 @@ private: // FIXME: What do we want to do with other escaped spaces, and escaped // spaces or newlines in the middle of tokens? // FIXME: Add a more explicit test. - while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') { + while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && + FormatTok->TokenText[1] == '\n') { // FIXME: ++FormatTok->NewlinesBefore is missing... WhitespaceLength += 2; - Text = Text.substr(2); + FormatTok->TokenText = FormatTok->TokenText.substr(2); } TrailingWhitespace = 0; if (FormatTok->Tok.is(tok::comment)) { - StringRef UntrimmedText = Text; - Text = Text.rtrim(); - TrailingWhitespace = UntrimmedText.size() - Text.size(); + StringRef UntrimmedText = FormatTok->TokenText; + FormatTok->TokenText = FormatTok->TokenText.rtrim(); + TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); } else if (FormatTok->Tok.is(tok::raw_identifier)) { - IdentifierInfo &Info = IdentTable.get(Text); + IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); FormatTok->Tok.setIdentifierInfo(&Info); FormatTok->Tok.setKind(Info.getTokenID()); } else if (FormatTok->Tok.is(tok::greatergreater)) { FormatTok->Tok.setKind(tok::greater); - Text = Text.substr(0, 1); + FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); GreaterStashed = true; } // Now FormatTok is the next non-whitespace token. - FormatTok->TokenText = Text; - FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding); + FormatTok->CodePointCount = + encoding::getCodePointCount(FormatTok->TokenText, Encoding); FormatTok->WhitespaceRange = SourceRange( WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); @@ -1327,10 +1331,18 @@ private: llvm::SpecificBumpPtrAllocator Allocator; SmallVector Tokens; - /// Returns the text of \c FormatTok. - StringRef rawTokenText(Token &Tok) { - return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), - Tok.getLength()); + void readRawToken(FormatToken &Tok) { + Lex.LexFromRawLexer(Tok.Tok); + Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), + Tok.Tok.getLength()); + + // For formatting, treat unterminated string literals like normal string + // literals. + if (Tok.is(tok::unknown) && !Tok.TokenText.empty() && + Tok.TokenText[0] == '"') { + Tok.Tok.setKind(tok::string_literal); + Tok.IsUnterminatedLiteral = true; + } } }; diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index 03f097000f..3841e49ef4 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -77,11 +77,12 @@ struct FormatToken { FormatToken() : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), CodePointCount(0), IsFirst(false), MustBreakBefore(false), - BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), - CanBreakBefore(false), ClosesTemplateDeclaration(false), - ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0), - UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0), - LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false), + IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown), + SpacesRequiredBefore(0), CanBreakBefore(false), + ClosesTemplateDeclaration(false), ParameterCount(0), + PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0), + BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0), + FakeRParens(0), LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL), Next(NULL) {} @@ -133,6 +134,9 @@ struct FormatToken { /// escaped newlines. StringRef TokenText; + /// \brief Set to \c true if this token is an unterminated literal. + bool IsUnterminatedLiteral; + /// \brief Contains the kind of block if this token is a brace. BraceBlockKind BlockKind; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 7a2364c0b9..6f6f468c2a 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -948,13 +948,14 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->SpacesRequiredBefore = spaceRequiredBefore(Line, *Current) ? 1 : 0; - if (Current->MustBreakBefore) { - } else if (Current->is(tok::comment)) { + if (Current->is(tok::comment)) { Current->MustBreakBefore = Current->NewlinesBefore > 0; } else if (Current->Previous->isTrailingComment() || (Current->is(tok::string_literal) && Current->Previous->is(tok::string_literal))) { Current->MustBreakBefore = true; + } else if (Current->Previous->IsUnterminatedLiteral) { + Current->MustBreakBefore = true; } else if (Current->is(tok::lessless) && Current->Next && Current->Previous->is(tok::string_literal) && Current->Next->is(tok::string_literal)) { @@ -969,8 +970,6 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->getNextNonComment() && Current->getNextNonComment()->is(tok::string_literal)) { Current->MustBreakBefore = true; - } else { - Current->MustBreakBefore = false; } Current->CanBreakBefore = Current->MustBreakBefore || canBreakBefore(Line, *Current); diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index aa2a70987b..3d5c21a2f7 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -5004,6 +5004,11 @@ TEST_F(FormatTest, SkipsUnknownStringLiterals) { format("L\"unsupported literal\";", getGoogleStyleWithColumns(15))); EXPECT_EQ("R\"x(raw literal)x\";", format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15))); + verifyFormat("string a = \"unterminated;"); + EXPECT_EQ("function(\"unterminated,\n" + " OtherParameter);", + format("function( \"unterminated,\n" + " OtherParameter);")); } TEST_F(FormatTest, DoesNotTryToParseUDLiteralsInPreCpp11Code) { -- 2.40.0