From: Alexander Kornienko Date: Thu, 5 Sep 2013 14:08:34 +0000 (+0000) Subject: Handle zero-width and double-width characters in string literals and comments. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0b62cc30c9aa462184de0435dc083d944a41d67f;p=clang Handle zero-width and double-width characters in string literals and comments. Summary: Count column width instead of the number of code points. This also includes correct handling of tabs inside string literals and comments (with an exception of multiline string literals/comments, where tabs are present before the first escaped newline). Reviewers: djasper, klimek Reviewed By: klimek CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1601 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@190052 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h index 6874d9e7a8..6c6575f0b7 100644 --- a/include/clang/Format/Format.h +++ b/include/clang/Format/Format.h @@ -149,9 +149,12 @@ struct FormatStyle { /// Otherwise puts them into the right-most column. bool AlignEscapedNewlinesLeft; - /// \brief The number of characters to use for indentation. + /// \brief The number of columns to use for indentation. unsigned IndentWidth; + /// \brief The number of columns used for tab stops. + unsigned TabWidth; + /// \brief The number of characters to use for indentation of constructor /// initializer lists. unsigned ConstructorInitializerIndentWidth; diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 3b29137798..053be4b025 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -41,6 +41,7 @@ static bool IsBlank(char C) { static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, + unsigned TabWidth, encoding::Encoding Encoding) { if (ColumnLimit <= ContentStartColumn + 1) return BreakableToken::Split(StringRef::npos, 0); @@ -49,9 +50,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text, unsigned MaxSplitBytes = 0; for (unsigned NumChars = 0; - NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars) - MaxSplitBytes += + NumChars < MaxSplit && MaxSplitBytes < Text.size();) { + unsigned BytesInChar = encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); + NumChars += + encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), + ContentStartColumn, TabWidth, Encoding); + MaxSplitBytes += BytesInChar; + } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); if (SpaceOffset == StringRef::npos || @@ -78,6 +84,7 @@ static BreakableToken::Split getCommentSplit(StringRef Text, static BreakableToken::Split getStringSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, + unsigned TabWidth, encoding::Encoding Encoding) { // FIXME: Reduce unit test case. if (Text.empty()) @@ -86,7 +93,9 @@ static BreakableToken::Split getStringSplit(StringRef Text, return BreakableToken::Split(StringRef::npos, 0); unsigned MaxSplit = std::min(ColumnLimit - ContentStartColumn, - encoding::getCodePointCount(Text, Encoding) - 1); + encoding::columnWidthWithTabs(Text, ContentStartColumn, + TabWidth, Encoding) - + 1); StringRef::size_type SpaceOffset = 0; StringRef::size_type SlashOffset = 0; StringRef::size_type WordStartOffset = 0; @@ -98,7 +107,9 @@ static BreakableToken::Split getStringSplit(StringRef Text, Chars += Advance; } else { Advance = encoding::getCodePointNumBytes(Text[0], Encoding); - Chars += 1; + Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance), + ContentStartColumn + Chars, + TabWidth, Encoding); } if (Chars > MaxSplit) @@ -131,14 +142,17 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; } unsigned BreakableSingleLineToken::getLineLengthAfterSplit( unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { return StartColumn + Prefix.size() + Postfix.size() + - encoding::getCodePointCount(Line.substr(Offset, Length), Encoding); + encoding::columnWidthWithTabs(Line.substr(Offset, Length), + StartColumn + Prefix.size(), + Style.TabWidth, Encoding); } BreakableSingleLineToken::BreakableSingleLineToken( const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding) - : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn), - Prefix(Prefix), Postfix(Postfix) { + StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableToken(Tok, InPPDirective, Encoding, Style), + StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); @@ -147,15 +161,16 @@ BreakableSingleLineToken::BreakableSingleLineToken( BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, bool InPPDirective, - encoding::Encoding Encoding) + encoding::Encoding Encoding, + const FormatStyle &Style) : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective, - Encoding) {} + Encoding, Style) {} BreakableToken::Split BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit, - Encoding); + Style.TabWidth, Encoding); } void BreakableStringLiteral::insertBreak(unsigned LineIndex, @@ -177,10 +192,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) { BreakableLineComment::BreakableLineComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, - encoding::Encoding Encoding) + encoding::Encoding Encoding, + const FormatStyle &Style) : BreakableSingleLineToken(Token, StartColumn, getLineCommentPrefix(Token.TokenText), "", - InPPDirective, Encoding) { + InPPDirective, Encoding, Style) { OriginalPrefix = Prefix; if (Token.TokenText.size() > Prefix.size() && isAlphanumeric(Token.TokenText[Prefix.size()])) { @@ -195,7 +211,7 @@ BreakableToken::Split BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), - ColumnLimit, Encoding); + ColumnLimit, Style.TabWidth, Encoding); } void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, @@ -216,10 +232,10 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, } BreakableBlockComment::BreakableBlockComment( - const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn, + const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, - encoding::Encoding Encoding) - : BreakableToken(Token, InPPDirective, Encoding) { + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableToken(Token, InPPDirective, Encoding, Style) { StringRef TokenText(Token.TokenText); assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); @@ -229,7 +245,7 @@ BreakableBlockComment::BreakableBlockComment( StartOfLineColumn.resize(Lines.size()); StartOfLineColumn[0] = StartColumn + 2; for (size_t i = 1; i < Lines.size(); ++i) - adjustWhitespace(Style, i, IndentDelta); + adjustWhitespace(i, IndentDelta); Decoration = "* "; if (Lines.size() == 1 && !FirstInLine) { @@ -282,8 +298,7 @@ BreakableBlockComment::BreakableBlockComment( }); } -void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, - unsigned LineIndex, +void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, int IndentDelta) { // When in a preprocessor directive, the trailing backslash in a block comment // is not needed, but can serve a purpose of uniformity with necessary escaped @@ -306,6 +321,7 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, if (StartOfLine == StringRef::npos) StartOfLine = Lines[LineIndex].size(); + StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); // Adjust Lines to only contain relevant text. Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); @@ -321,16 +337,19 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, // if leading tabs are intermixed with spaces, that is not a high priority. // Adjust the start column uniformly accross all lines. - StartOfLineColumn[LineIndex] = std::max(0, StartOfLine + IndentDelta); + StartOfLineColumn[LineIndex] = + std::max(0, Whitespace.size() + IndentDelta); } unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } unsigned BreakableBlockComment::getLineLengthAfterSplit( unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { - return getContentStartColumn(LineIndex, Offset) + - encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length), - Encoding) + + unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); + return ContentStartColumn + + encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), + ContentStartColumn, Style.TabWidth, + Encoding) + // The last line gets a "*/" postfix. (LineIndex + 1 == Lines.size() ? 2 : 0); } @@ -340,7 +359,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getCommentSplit(Lines[LineIndex].substr(TailOffset), getContentStartColumn(LineIndex, TailOffset), - ColumnLimit, Encoding); + ColumnLimit, Style.TabWidth, Encoding); } void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index 90b78ac03a..65b9015251 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -67,12 +67,14 @@ public: protected: BreakableToken(const FormatToken &Tok, bool InPPDirective, - encoding::Encoding Encoding) - : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {} + encoding::Encoding Encoding, const FormatStyle &Style) + : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding), + Style(Style) {} const FormatToken &Tok; const bool InPPDirective; const encoding::Encoding Encoding; + const FormatStyle &Style; }; /// \brief Base class for single line tokens that can be broken. @@ -88,7 +90,8 @@ public: protected: BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, - bool InPPDirective, encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); // The column in which the token starts. unsigned StartColumn; @@ -107,7 +110,8 @@ public: /// \p StartColumn specifies the column in which the token will start /// after formatting. BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, - bool InPPDirective, encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; @@ -122,7 +126,8 @@ public: /// \p StartColumn specifies the column in which the comment will start /// after formatting. BreakableLineComment(const FormatToken &Token, unsigned StartColumn, - bool InPPDirective, encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; @@ -144,10 +149,10 @@ public: /// after formatting, while \p OriginalStartColumn specifies in which /// column the comment started before formatting. /// If the comment starts a line after formatting, set \p FirstInLine to true. - BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token, - unsigned StartColumn, unsigned OriginaStartColumn, - bool FirstInLine, bool InPPDirective, - encoding::Encoding Encoding); + BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, + unsigned OriginaStartColumn, bool FirstInLine, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, @@ -172,8 +177,7 @@ private: // Sets StartOfLineColumn to the intended column in which the text at // Lines[LineIndex] starts (note that the decoration, if present, is not // considered part of the text). - void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex, - int IndentDelta); + void adjustWhitespace(unsigned LineIndex, int IndentDelta); // Returns the column at which the text in line LineIndex starts, when broken // at TailOffset. Note that the decoration (if present) is not considered part diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index c894a4b29e..9e84ea770f 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -623,10 +623,10 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current, State.Stack[i].BreakBeforeParameter = true; unsigned ColumnsUsed = - State.Column - Current.CodePointCount + Current.CodePointsInFirstLine; + State.Column - Current.CodePointCount + Current.FirstLineColumnWidth; // We can only affect layout of the first and the last line, so the penalty // for all other lines is constant, and we ignore it. - State.Column = Current.CodePointsInLastLine; + State.Column = Current.LastLineColumnWidth; if (ColumnsUsed > getColumnLimit(State)) return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State)); @@ -659,14 +659,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, return 0; Token.reset(new BreakableStringLiteral( - Current, StartColumn, State.Line->InPPDirective, Encoding)); + Current, StartColumn, State.Line->InPPDirective, Encoding, Style)); } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { unsigned OriginalStartColumn = SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 1; Token.reset(new BreakableBlockComment( - Style, Current, StartColumn, OriginalStartColumn, !Current.Previous, - State.Line->InPPDirective, Encoding)); + Current, StartColumn, OriginalStartColumn, !Current.Previous, + State.Line->InPPDirective, Encoding, Style)); } else if (Current.Type == TT_LineComment && (Current.Previous == NULL || Current.Previous->Type != TT_ImplicitStringLiteral)) { @@ -678,12 +678,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // leading whitespace in consecutive lines when changing indentation of // the first line similar to what we do with block comments. if (Current.isMultiline()) { - State.Column = StartColumn + Current.CodePointsInFirstLine; + State.Column = StartColumn + Current.FirstLineColumnWidth; return 0; } - Token.reset(new BreakableLineComment(Current, StartColumn, - State.Line->InPPDirective, Encoding)); + Token.reset(new BreakableLineComment( + Current, StartColumn, State.Line->InPPDirective, Encoding, Style)); } else { return 0; } diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h index e9e9ae71c7..356334d537 100644 --- a/lib/Format/Encoding.h +++ b/lib/Format/Encoding.h @@ -18,6 +18,7 @@ #include "clang/Basic/LLVM.h" #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Unicode.h" namespace clang { namespace format { @@ -57,6 +58,37 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) { } } +/// \brief Returns the number of columns required to display the \p Text on a +/// generic Unicode-capable terminal. Text is assumed to use the specified +/// \p Encoding. +inline unsigned columnWidth(StringRef Text, Encoding Encoding) { + if (Encoding == Encoding_UTF8) { + int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); + if (ContentWidth >= 0) + return ContentWidth; + } + return Text.size(); +} + +/// \brief Returns the number of columns required to display the \p Text, +/// starting from the \p StartColumn on a terminal with the \p TabWidth. The +/// text is assumed to use the specified \p Encoding. +inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, + unsigned TabWidth, Encoding Encoding) { + unsigned TotalWidth = 0; + StringRef Tail = Text; + for (;;) { + StringRef::size_type TabPos = Tail.find('\t'); + if (TabPos == StringRef::npos) + return TotalWidth + columnWidth(Tail, Encoding); + int Width = columnWidth(Tail.substr(0, TabPos), Encoding); + assert(Width >= 0); + TotalWidth += Width; + TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth; + Tail = Tail.substr(TabPos + 1); + } +} + /// \brief Gets the number of bytes in a sequence representing a single /// codepoint and starting with FirstChar in the specified Encoding. inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index c482c402ec..02adc5acd6 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -136,6 +136,7 @@ template <> struct MappingTraits { IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); IO.mapOptional("Standard", Style.Standard); IO.mapOptional("IndentWidth", Style.IndentWidth); + IO.mapOptional("TabWidth", Style.TabWidth); IO.mapOptional("UseTab", Style.UseTab); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); IO.mapOptional("IndentFunctionDeclarationAfterType", @@ -184,6 +185,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.IndentCaseLabels = false; LLVMStyle.IndentFunctionDeclarationAfterType = false; LLVMStyle.IndentWidth = 2; + LLVMStyle.TabWidth = 8; LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; LLVMStyle.ObjCSpaceBeforeProtocolList = true; @@ -225,6 +227,7 @@ FormatStyle getGoogleStyle() { GoogleStyle.IndentCaseLabels = true; GoogleStyle.IndentFunctionDeclarationAfterType = true; GoogleStyle.IndentWidth = 2; + GoogleStyle.TabWidth = 8; GoogleStyle.MaxEmptyLinesToKeep = 1; GoogleStyle.NamespaceIndentation = FormatStyle::NI_None; GoogleStyle.ObjCSpaceBeforeProtocolList = false; @@ -629,7 +632,7 @@ private: ++Column; break; case '\t': - Column += Style.IndentWidth - Column % Style.IndentWidth; + Column += Style.TabWidth - Column % Style.TabWidth; break; default: ++Column; @@ -681,10 +684,12 @@ private: StringRef Text = FormatTok->TokenText; size_t FirstNewlinePos = Text.find('\n'); if (FirstNewlinePos != StringRef::npos) { - FormatTok->CodePointsInFirstLine = encoding::getCodePointCount( - Text.substr(0, FirstNewlinePos), Encoding); - FormatTok->CodePointsInLastLine = encoding::getCodePointCount( - Text.substr(Text.find_last_of('\n') + 1), Encoding); + // FIXME: Handle embedded tabs. + FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs( + Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding); + FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( + Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, + Encoding); } } // FIXME: Add the CodePointCount to Column. diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index 0b770f30e6..e4342dd131 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -83,7 +83,7 @@ class AnnotatedLine; struct FormatToken { FormatToken() : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), - CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0), + CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0), IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), CanBreakBefore(false), ClosesTemplateDeclaration(false), @@ -120,15 +120,15 @@ struct FormatToken { /// \brief Contains the number of code points in the first line of a /// multi-line string literal or comment. Zero if there's no newline in the /// token. - unsigned CodePointsInFirstLine; + unsigned FirstLineColumnWidth; /// \brief Contains the number of code points in the last line of a /// multi-line string literal or comment. Can be zero for line comments. - unsigned CodePointsInLastLine; + unsigned LastLineColumnWidth; /// \brief Returns \c true if the token text contains newlines (escaped or /// not). - bool isMultiline() const { return CodePointsInFirstLine != 0; } + bool isMultiline() const { return FirstLineColumnWidth != 0; } /// \brief Indicates that this is the first token. bool IsFirst; diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 41519b64c4..0f46e62563 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -272,8 +272,8 @@ std::string WhitespaceManager::getIndentText(unsigned Spaces) { if (!Style.UseTab) return std::string(Spaces, ' '); - return std::string(Spaces / Style.IndentWidth, '\t') + - std::string(Spaces % Style.IndentWidth, ' '); + return std::string(Spaces / Style.TabWidth, '\t') + + std::string(Spaces % Style.TabWidth, ' '); } } // namespace format diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 4c66ef0c28..2307b03f51 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -5638,9 +5638,41 @@ TEST_F(FormatTest, ConfigurableUseOfTab) { "}", 21, 0, Tab)); + Tab.TabWidth = 4; + Tab.IndentWidth = 8; + verifyFormat("class TabWidth4Indent8 {\n" + "\t\tvoid f() {\n" + "\t\t\t\tsomeFunction(parameter1,\n" + "\t\t\t\t\t\t\t parameter2);\n" + "\t\t}\n" + "};", + Tab); + + Tab.TabWidth = 4; + Tab.IndentWidth = 4; + verifyFormat("class TabWidth4Indent4 {\n" + "\tvoid f() {\n" + "\t\tsomeFunction(parameter1,\n" + "\t\t\t\t\t parameter2);\n" + "\t}\n" + "};", + Tab); + + Tab.TabWidth = 8; + Tab.IndentWidth = 4; + verifyFormat("class TabWidth8Indent4 {\n" + " void f() {\n" + "\tsomeFunction(parameter1,\n" + "\t\t parameter2);\n" + " }\n" + "};", + Tab); + // FIXME: To correctly count mixed whitespace we need to // also correctly count mixed whitespace in front of the comment. - // + + // Tab.TabWidth = 8; + // Tab.IndentWidth = 8; // EXPECT_EQ("/*\n" // "\t a\t\tcomment\n" // "\t in multiple lines\n" @@ -6074,15 +6106,15 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) { verifyFormat("\"Однажды в студёную зимнюю пору...\"", getLLVMStyleWithColumns(35)); verifyFormat("\"一 二 三 四 五 六 七 八 九 十\"", - getLLVMStyleWithColumns(21)); + getLLVMStyleWithColumns(31)); verifyFormat("// Однажды в студёную зимнюю пору...", getLLVMStyleWithColumns(36)); verifyFormat("// 一 二 三 四 五 六 七 八 九 十", - getLLVMStyleWithColumns(22)); + getLLVMStyleWithColumns(32)); verifyFormat("/* Однажды в студёную зимнюю пору... */", getLLVMStyleWithColumns(39)); verifyFormat("/* 一 二 三 四 五 六 七 八 九 十 */", - getLLVMStyleWithColumns(25)); + getLLVMStyleWithColumns(35)); } TEST_F(FormatTest, SplitsUTF8Strings) { @@ -6093,11 +6125,29 @@ TEST_F(FormatTest, SplitsUTF8Strings) { "\"пору,\"", format("\"Однажды, в студёную зимнюю пору,\"", getLLVMStyleWithColumns(13))); - EXPECT_EQ("\"一 二 三 四 \"\n" - "\"五 六 七 八 \"\n" - "\"九 十\"", - format("\"一 二 三 四 五 六 七 八 九 十\"", - getLLVMStyleWithColumns(10))); + EXPECT_EQ("\"一 二 三 \"\n" + "\"四 五六 \"\n" + "\"七 八 九 \"\n" + "\"十\"", + format("\"一 二 三 四 五六 七 八 九 十\"", + getLLVMStyleWithColumns(11))); + EXPECT_EQ("\"一\t二 \"\n" + "\"\t三 \"\n" + "\"四 五\t六 \"\n" + "\"\t七 \"\n" + "\"八九十\tqq\"", + format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"", + getLLVMStyleWithColumns(11))); +} + + +TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) { + EXPECT_EQ("const char *sssss =\n" + " \"一二三四五六七八\\\n" + " 九 十\";", + format("const char *sssss = \"一二三四五六七八\\\n" + " 九 十\";", + getLLVMStyleWithColumns(30))); } TEST_F(FormatTest, SplitsUTF8LineComments) { @@ -6109,9 +6159,9 @@ TEST_F(FormatTest, SplitsUTF8LineComments) { getLLVMStyleWithColumns(13))); EXPECT_EQ("// 一二三\n" "// 四五六七\n" - "// 八\n" - "// 九 十", - format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(6))); + "// 八 九\n" + "// 十", + format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(9))); } TEST_F(FormatTest, SplitsUTF8BlockComments) { @@ -6126,18 +6176,20 @@ TEST_F(FormatTest, SplitsUTF8BlockComments) { format("/* Гляжу, поднимается медленно в гору\n" " * Лошадка, везущая хворосту воз. */", getLLVMStyleWithColumns(13))); - EXPECT_EQ("/* 一二三\n" - " * 四五六七\n" - " * 八\n" - " * 九 十\n" - " */", - format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(6))); + EXPECT_EQ( + "/* 一二三\n" + " * 四五六七\n" + " * 八 九\n" + " * 十 */", + format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(9))); EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n" " * 𝕓𝕪𝕥𝕖\n" " * 𝖀𝕿𝕱-𝟠 */", format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12))); } +#endif // _MSC_VER + TEST_F(FormatTest, ConstructorInitializerIndentWidth) { FormatStyle Style = getLLVMStyle(); @@ -6185,8 +6237,6 @@ TEST_F(FormatTest, ConstructorInitializerIndentWidth) { Style); } -#endif - TEST_F(FormatTest, FormatsWithWebKitStyle) { FormatStyle Style = getWebKitStyle();