From 712b7473f9de7cc1979c3754d03e15a2f492349e Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Wed, 7 Aug 2013 23:29:01 +0000 Subject: [PATCH] Support for double width characters. Summary: Only works for UTF-8-encoded files. Reviewers: djasper Reviewed By: djasper CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1311 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@187935 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Format/BreakableToken.cpp | 29 +++++++++++++++++++--------- unittests/Format/FormatTest.cpp | 34 ++++++++++++++++----------------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 3b29137798..c2365f1402 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -20,6 +20,7 @@ #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Locale.h" #include namespace clang { @@ -38,6 +39,15 @@ static bool IsBlank(char C) { } } +static unsigned columnWidth(StringRef Text, encoding::Encoding Encoding) { + if (Encoding == encoding::Encoding_UTF8) { + int ContentWidth = llvm::sys::locale::columnWidth(Text); + if (ContentWidth >= 0) + return ContentWidth; + } + return encoding::getCodePointCount(Text, Encoding); +} + static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, @@ -49,9 +59,12 @@ static BreakableToken::Split getCommentSplit(StringRef Text, unsigned MaxSplitBytes = 0; for (unsigned NumChars = 0; - NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars) - MaxSplitBytes += + NumChars < MaxSplit && MaxSplitBytes < Text.size();) { + unsigned NumBytes = encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); + NumChars += columnWidth(Text.substr(MaxSplitBytes, NumBytes), Encoding); + MaxSplitBytes += NumBytes; + } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); if (SpaceOffset == StringRef::npos || @@ -84,9 +97,8 @@ static BreakableToken::Split getStringSplit(StringRef Text, return BreakableToken::Split(StringRef::npos, 0); if (ColumnLimit <= ContentStartColumn) return BreakableToken::Split(StringRef::npos, 0); - unsigned MaxSplit = - std::min(ColumnLimit - ContentStartColumn, - encoding::getCodePointCount(Text, Encoding) - 1); + unsigned MaxSplit = std::min(ColumnLimit - ContentStartColumn, + columnWidth(Text, Encoding) - 1); StringRef::size_type SpaceOffset = 0; StringRef::size_type SlashOffset = 0; StringRef::size_type WordStartOffset = 0; @@ -98,7 +110,7 @@ static BreakableToken::Split getStringSplit(StringRef Text, Chars += Advance; } else { Advance = encoding::getCodePointNumBytes(Text[0], Encoding); - Chars += 1; + Chars += columnWidth(Text.substr(0, Advance), Encoding); } if (Chars > MaxSplit) @@ -131,7 +143,7 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; } unsigned BreakableSingleLineToken::getLineLengthAfterSplit( unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { return StartColumn + Prefix.size() + Postfix.size() + - encoding::getCodePointCount(Line.substr(Offset, Length), Encoding); + columnWidth(Line.substr(Offset, Length), Encoding); } BreakableSingleLineToken::BreakableSingleLineToken( @@ -329,8 +341,7 @@ unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } unsigned BreakableBlockComment::getLineLengthAfterSplit( unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { return getContentStartColumn(LineIndex, Offset) + - encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length), - Encoding) + + columnWidth(Lines[LineIndex].substr(Offset, Length), Encoding) + // The last line gets a "*/" postfix. (LineIndex + 1 == Lines.size() ? 2 : 0); } diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 2969e568eb..48b1f63fca 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -5704,15 +5704,15 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) { verifyFormat("\"Однажды в студёную зимнюю пору...\"", getLLVMStyleWithColumns(35)); verifyFormat("\"一 二 三 四 五 六 七 八 九 十\"", - getLLVMStyleWithColumns(21)); + getLLVMStyleWithColumns(31)); verifyFormat("// Однажды в студёную зимнюю пору...", getLLVMStyleWithColumns(36)); verifyFormat("// 一 二 三 四 五 六 七 八 九 十", - getLLVMStyleWithColumns(22)); + getLLVMStyleWithColumns(32)); verifyFormat("/* Однажды в студёную зимнюю пору... */", getLLVMStyleWithColumns(39)); verifyFormat("/* 一 二 三 四 五 六 七 八 九 十 */", - getLLVMStyleWithColumns(25)); + getLLVMStyleWithColumns(35)); } TEST_F(FormatTest, SplitsUTF8Strings) { @@ -5723,11 +5723,12 @@ TEST_F(FormatTest, SplitsUTF8Strings) { "\"пору,\"", format("\"Однажды, в студёную зимнюю пору,\"", getLLVMStyleWithColumns(13))); - EXPECT_EQ("\"一 二 三 四 \"\n" - "\"五 六 七 八 \"\n" - "\"九 十\"", - format("\"一 二 三 四 五 六 七 八 九 十\"", - getLLVMStyleWithColumns(10))); + EXPECT_EQ("\"一 二 三 \"\n" + "\"四 五六 \"\n" + "\"七 八 九 \"\n" + "\"十\"", + format("\"一 二 三 四 五六 七 八 九 十\"", + getLLVMStyleWithColumns(11))); } TEST_F(FormatTest, SplitsUTF8LineComments) { @@ -5739,9 +5740,9 @@ TEST_F(FormatTest, SplitsUTF8LineComments) { getLLVMStyleWithColumns(13))); EXPECT_EQ("// 一二三\n" "// 四五六七\n" - "// 八\n" - "// 九 十", - format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(6))); + "// 八 九\n" + "// 十", + format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(9))); } TEST_F(FormatTest, SplitsUTF8BlockComments) { @@ -5758,16 +5759,17 @@ TEST_F(FormatTest, SplitsUTF8BlockComments) { getLLVMStyleWithColumns(13))); EXPECT_EQ("/* 一二三\n" " * 四五六七\n" - " * 八\n" - " * 九 十\n" - " */", - format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(6))); + " * 八 九\n" + " * 十 */", + format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(9))); EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n" " * 𝕓𝕪𝕥𝕖\n" " * 𝖀𝕿𝕱-𝟠 */", format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12))); } +#endif // _MSC_VER + TEST_F(FormatTest, FormatsWithWebKitStyle) { FormatStyle Style = getWebKitStyle(); @@ -5847,7 +5849,5 @@ TEST_F(FormatTest, FormatsWithWebKitStyle) { format("if (aaaaaaaaaaaaaaa || bbbbbbbbbbbbbbb) { i++; }", Style)); } -#endif - } // end namespace tooling } // end namespace clang -- 2.40.0