From: Alexander Kornienko Date: Mon, 2 Sep 2013 13:58:14 +0000 (+0000) Subject: Store first and last newline position in the token text for string literals and comments. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4b762a91faac0473fa380ea9731992b24867e2e6;p=clang Store first and last newline position in the token text for string literals and comments. Summary: Store first and last newline position in the token text for string literals and comments to avoid doing .find('\n') for each possible solution. Reviewers: djasper Reviewed By: djasper CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1556 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@189758 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 878580e892..9891cb2e09 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -583,23 +583,16 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, unsigned ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current, LineState &State) { - StringRef Text = Current.TokenText; - // We can only affect layout of the first and the last line, so the penalty - // for all other lines is constant, and we ignore it. - size_t FirstLineBreak = Text.find('\n'); - size_t LastLineBreak = Text.find_last_of('\n'); - assert(FirstLineBreak != StringRef::npos); - unsigned StartColumn = State.Column - Current.CodePointCount; - State.Column = - encoding::getCodePointCount(Text.substr(LastLineBreak + 1), Encoding); - // Break before further function parameters on all levels. for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; unsigned ColumnsUsed = - StartColumn + - encoding::getCodePointCount(Text.substr(0, FirstLineBreak), Encoding); + State.Column - Current.CodePointCount + Current.CodePointsInFirstLine; + // We can only affect layout of the first and the last line, so the penalty + // for all other lines is constant, and we ignore it. + State.Column = Current.CodePointsInLastLine; + if (ColumnsUsed > getColumnLimit()) return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit()); return 0; @@ -619,7 +612,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // Don't break string literals with (in case of non-raw strings, escaped) // newlines. As clang-format must not change the string's content, it is // unlikely that we'll end up with a better format. - if (Current.IsMultiline) + if (Current.isMultiline()) return addMultilineStringLiteral(Current, State); // Only break up default narrow strings. @@ -649,14 +642,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // FIXME: If we want to handle them correctly, we'll need to adjust // leading whitespace in consecutive lines when changing indentation of // the first line similar to what we do with block comments. - if (Current.IsMultiline) { - StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n"); - assert(EscapedNewlinePos != StringRef::npos); - State.Column = - StartColumn + - encoding::getCodePointCount( - Current.TokenText.substr(0, EscapedNewlinePos), Encoding) + - 1; + if (Current.isMultiline()) { + State.Column = StartColumn + Current.CodePointsInFirstLine; return 0; } @@ -740,7 +727,7 @@ bool ContinuationIndenter::NextIsMultilineString(const LineState &State) { // AlwaysBreakBeforeMultilineStrings implementation. if (Current.TokenText.startswith("R\"")) return false; - if (Current.IsMultiline) + if (Current.isMultiline()) return true; if (Current.getNextNonComment() && Current.getNextNonComment()->is(tok::string_literal)) diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 84bf36c7fd..39d2c0f96c 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -596,10 +596,16 @@ private: FormatTok->CodePointCount = encoding::getCodePointCount(FormatTok->TokenText, Encoding); - if (FormatTok->isOneOf(tok::string_literal, tok::comment) && - FormatTok->TokenText.find('\n') != StringRef::npos) - FormatTok->IsMultiline = true; - + if (FormatTok->isOneOf(tok::string_literal, tok::comment)) { + StringRef Text = FormatTok->TokenText; + size_t FirstNewlinePos = Text.find('\n'); + if (FirstNewlinePos != StringRef::npos) { + FormatTok->CodePointsInFirstLine = encoding::getCodePointCount( + Text.substr(0, FirstNewlinePos), Encoding); + FormatTok->CodePointsInLastLine = encoding::getCodePointCount( + Text.substr(Text.find_last_of('\n') + 1), Encoding); + } + } // FIXME: Add the CodePointCount to Column. FormatTok->WhitespaceRange = SourceRange( WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index 6006ec87c0..24d4c59e61 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -80,9 +80,9 @@ class TokenRole; /// whitespace characters preceeding it. struct FormatToken { FormatToken() - : NewlinesBefore(0), HasUnescapedNewline(false), IsMultiline(false), - LastNewlineOffset(0), CodePointCount(0), IsFirst(false), - MustBreakBefore(false), IsUnterminatedLiteral(false), + : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), + CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0), + IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), CanBreakBefore(false), ClosesTemplateDeclaration(false), ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0), @@ -104,9 +104,6 @@ struct FormatToken { /// Token. bool HasUnescapedNewline; - /// \brief Whether the token text contains newlines (escaped or not). - bool IsMultiline; - /// \brief The range of the whitespace immediately preceeding the \c Token. SourceRange WhitespaceRange; @@ -118,6 +115,19 @@ struct FormatToken { /// We need this to correctly measure number of columns a token spans. unsigned CodePointCount; + /// \brief Contains the number of code points in the first line of a + /// multi-line string literal or comment. Zero if there's no newline in the + /// token. + unsigned CodePointsInFirstLine; + + /// \brief Contains the number of code points in the last line of a + /// multi-line string literal or comment. Can be zero for line comments. + unsigned CodePointsInLastLine; + + /// \brief Returns \c true if the token text contains newlines (escaped or + /// not). + bool isMultiline() const { return CodePointsInFirstLine != 0; } + /// \brief Indicates that this is the first token. bool IsFirst; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 5b9802ddd6..38a1c1a5e1 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -1025,7 +1025,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->CanBreakBefore = Current->MustBreakBefore || canBreakBefore(Line, *Current); if (Current->MustBreakBefore || - (Current->is(tok::string_literal) && Current->IsMultiline)) + (Current->is(tok::string_literal) && Current->isMultiline())) Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit; else Current->TotalLength = Current->Previous->TotalLength +