From: Alexander Kornienko <alexfh@google.com>
Date: Thu, 5 Sep 2013 14:08:34 +0000 (+0000)
Subject: Handle zero-width and double-width characters in string literals and comments.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0b62cc30c9aa462184de0435dc083d944a41d67f;p=clang

Handle zero-width and double-width characters in string literals and comments.

Summary:
Count column width instead of the number of code points. This also
includes correct handling of tabs inside string literals and comments (with an
exception of multiline string literals/comments, where tabs are present before
the first escaped newline).

Reviewers: djasper, klimek

Reviewed By: klimek

CC: cfe-commits, klimek

Differential Revision: http://llvm-reviews.chandlerc.com/D1601

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@190052 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h
index 6874d9e7a8..6c6575f0b7 100644
--- a/include/clang/Format/Format.h
+++ b/include/clang/Format/Format.h
@@ -149,9 +149,12 @@ struct FormatStyle {
   /// Otherwise puts them into the right-most column.
   bool AlignEscapedNewlinesLeft;
 
-  /// \brief The number of characters to use for indentation.
+  /// \brief The number of columns to use for indentation.
   unsigned IndentWidth;
 
+  /// \brief The number of columns used for tab stops.
+  unsigned TabWidth;
+
   /// \brief The number of characters to use for indentation of constructor
   /// initializer lists.
   unsigned ConstructorInitializerIndentWidth;
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index 3b29137798..053be4b025 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -41,6 +41,7 @@ static bool IsBlank(char C) {
 static BreakableToken::Split getCommentSplit(StringRef Text,
                                              unsigned ContentStartColumn,
                                              unsigned ColumnLimit,
+                                             unsigned TabWidth,
                                              encoding::Encoding Encoding) {
   if (ColumnLimit <= ContentStartColumn + 1)
     return BreakableToken::Split(StringRef::npos, 0);
@@ -49,9 +50,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
   unsigned MaxSplitBytes = 0;
 
   for (unsigned NumChars = 0;
-       NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
-    MaxSplitBytes +=
+       NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
+    unsigned BytesInChar =
         encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
+    NumChars +=
+        encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
+                                      ContentStartColumn, TabWidth, Encoding);
+    MaxSplitBytes += BytesInChar;
+  }
 
   StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
   if (SpaceOffset == StringRef::npos ||
@@ -78,6 +84,7 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
 static BreakableToken::Split getStringSplit(StringRef Text,
                                             unsigned ContentStartColumn,
                                             unsigned ColumnLimit,
+                                            unsigned TabWidth,
                                             encoding::Encoding Encoding) {
   // FIXME: Reduce unit test case.
   if (Text.empty())
@@ -86,7 +93,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
     return BreakableToken::Split(StringRef::npos, 0);
   unsigned MaxSplit =
       std::min<unsigned>(ColumnLimit - ContentStartColumn,
-                         encoding::getCodePointCount(Text, Encoding) - 1);
+                         encoding::columnWidthWithTabs(Text, ContentStartColumn,
+                                                       TabWidth, Encoding) -
+                             1);
   StringRef::size_type SpaceOffset = 0;
   StringRef::size_type SlashOffset = 0;
   StringRef::size_type WordStartOffset = 0;
@@ -98,7 +107,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
       Chars += Advance;
     } else {
       Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
-      Chars += 1;
+      Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance),
+                                             ContentStartColumn + Chars,
+                                             TabWidth, Encoding);
     }
 
     if (Chars > MaxSplit)
@@ -131,14 +142,17 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
 unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
     unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
   return StartColumn + Prefix.size() + Postfix.size() +
-         encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
+         encoding::columnWidthWithTabs(Line.substr(Offset, Length),
+                                       StartColumn + Prefix.size(),
+                                       Style.TabWidth, Encoding);
 }
 
 BreakableSingleLineToken::BreakableSingleLineToken(
     const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
-    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
-    : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
-      Prefix(Prefix), Postfix(Postfix) {
+    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+    const FormatStyle &Style)
+    : BreakableToken(Tok, InPPDirective, Encoding, Style),
+      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
   assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
   Line = Tok.TokenText.substr(
       Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
@@ -147,15 +161,16 @@ BreakableSingleLineToken::BreakableSingleLineToken(
 BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
                                                unsigned StartColumn,
                                                bool InPPDirective,
-                                               encoding::Encoding Encoding)
+                                               encoding::Encoding Encoding,
+                                               const FormatStyle &Style)
     : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
-                               Encoding) {}
+                               Encoding, Style) {}
 
 BreakableToken::Split
 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
                                  unsigned ColumnLimit) const {
   return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
-                        Encoding);
+                        Style.TabWidth, Encoding);
 }
 
 void BreakableStringLiteral::insertBreak(unsigned LineIndex,
@@ -177,10 +192,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) {
 BreakableLineComment::BreakableLineComment(const FormatToken &Token,
                                            unsigned StartColumn,
                                            bool InPPDirective,
-                                           encoding::Encoding Encoding)
+                                           encoding::Encoding Encoding,
+                                           const FormatStyle &Style)
     : BreakableSingleLineToken(Token, StartColumn,
                                getLineCommentPrefix(Token.TokenText), "",
-                               InPPDirective, Encoding) {
+                               InPPDirective, Encoding, Style) {
   OriginalPrefix = Prefix;
   if (Token.TokenText.size() > Prefix.size() &&
       isAlphanumeric(Token.TokenText[Prefix.size()])) {
@@ -195,7 +211,7 @@ BreakableToken::Split
 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
                                unsigned ColumnLimit) const {
   return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
-                         ColumnLimit, Encoding);
+                         ColumnLimit, Style.TabWidth, Encoding);
 }
 
 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
@@ -216,10 +232,10 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
 }
 
 BreakableBlockComment::BreakableBlockComment(
-    const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
+    const FormatToken &Token, unsigned StartColumn,
     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
-    encoding::Encoding Encoding)
-    : BreakableToken(Token, InPPDirective, Encoding) {
+    encoding::Encoding Encoding, const FormatStyle &Style)
+    : BreakableToken(Token, InPPDirective, Encoding, Style) {
   StringRef TokenText(Token.TokenText);
   assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
   TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
@@ -229,7 +245,7 @@ BreakableBlockComment::BreakableBlockComment(
   StartOfLineColumn.resize(Lines.size());
   StartOfLineColumn[0] = StartColumn + 2;
   for (size_t i = 1; i < Lines.size(); ++i)
-    adjustWhitespace(Style, i, IndentDelta);
+    adjustWhitespace(i, IndentDelta);
 
   Decoration = "* ";
   if (Lines.size() == 1 && !FirstInLine) {
@@ -282,8 +298,7 @@ BreakableBlockComment::BreakableBlockComment(
   });
 }
 
-void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
-                                             unsigned LineIndex,
+void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
                                              int IndentDelta) {
   // When in a preprocessor directive, the trailing backslash in a block comment
   // is not needed, but can serve a purpose of uniformity with necessary escaped
@@ -306,6 +321,7 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
   if (StartOfLine == StringRef::npos)
     StartOfLine = Lines[LineIndex].size();
 
+  StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
   // Adjust Lines to only contain relevant text.
   Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
   Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
@@ -321,16 +337,19 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
   // if leading tabs are intermixed with spaces, that is not a high priority.
 
   // Adjust the start column uniformly accross all lines.
-  StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
+  StartOfLineColumn[LineIndex] =
+      std::max<int>(0, Whitespace.size() + IndentDelta);
 }
 
 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
 
 unsigned BreakableBlockComment::getLineLengthAfterSplit(
     unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
-  return getContentStartColumn(LineIndex, Offset) +
-         encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
-                                     Encoding) +
+  unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
+  return ContentStartColumn +
+         encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
+                                       ContentStartColumn, Style.TabWidth,
+                                       Encoding) +
          // The last line gets a "*/" postfix.
          (LineIndex + 1 == Lines.size() ? 2 : 0);
 }
@@ -340,7 +359,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
                                 unsigned ColumnLimit) const {
   return getCommentSplit(Lines[LineIndex].substr(TailOffset),
                          getContentStartColumn(LineIndex, TailOffset),
-                         ColumnLimit, Encoding);
+                         ColumnLimit, Style.TabWidth, Encoding);
 }
 
 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h
index 90b78ac03a..65b9015251 100644
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -67,12 +67,14 @@ public:
 
 protected:
   BreakableToken(const FormatToken &Tok, bool InPPDirective,
-                 encoding::Encoding Encoding)
-      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
+                 encoding::Encoding Encoding, const FormatStyle &Style)
+      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
+        Style(Style) {}
 
   const FormatToken &Tok;
   const bool InPPDirective;
   const encoding::Encoding Encoding;
+  const FormatStyle &Style;
 };
 
 /// \brief Base class for single line tokens that can be broken.
@@ -88,7 +90,8 @@ public:
 protected:
   BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
                            StringRef Prefix, StringRef Postfix,
-                           bool InPPDirective, encoding::Encoding Encoding);
+                           bool InPPDirective, encoding::Encoding Encoding,
+                           const FormatStyle &Style);
 
   // The column in which the token starts.
   unsigned StartColumn;
@@ -107,7 +110,8 @@ public:
   /// \p StartColumn specifies the column in which the token will start
   /// after formatting.
   BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
-                         bool InPPDirective, encoding::Encoding Encoding);
+                         bool InPPDirective, encoding::Encoding Encoding,
+                         const FormatStyle &Style);
 
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const;
@@ -122,7 +126,8 @@ public:
   /// \p StartColumn specifies the column in which the comment will start
   /// after formatting.
   BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
-                       bool InPPDirective, encoding::Encoding Encoding);
+                       bool InPPDirective, encoding::Encoding Encoding,
+                       const FormatStyle &Style);
 
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const;
@@ -144,10 +149,10 @@ public:
   /// after formatting, while \p OriginalStartColumn specifies in which
   /// column the comment started before formatting.
   /// If the comment starts a line after formatting, set \p FirstInLine to true.
-  BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
-                        unsigned StartColumn, unsigned OriginaStartColumn,
-                        bool FirstInLine, bool InPPDirective,
-                        encoding::Encoding Encoding);
+  BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
+                        unsigned OriginaStartColumn, bool FirstInLine,
+                        bool InPPDirective, encoding::Encoding Encoding,
+                        const FormatStyle &Style);
 
   virtual unsigned getLineCount() const;
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
@@ -172,8 +177,7 @@ private:
   // Sets StartOfLineColumn to the intended column in which the text at
   // Lines[LineIndex] starts (note that the decoration, if present, is not
   // considered part of the text).
-  void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
-                        int IndentDelta);
+  void adjustWhitespace(unsigned LineIndex, int IndentDelta);
 
   // Returns the column at which the text in line LineIndex starts, when broken
   // at TailOffset. Note that the decoration (if present) is not considered part
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index c894a4b29e..9e84ea770f 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -623,10 +623,10 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
     State.Stack[i].BreakBeforeParameter = true;
 
   unsigned ColumnsUsed =
-      State.Column - Current.CodePointCount + Current.CodePointsInFirstLine;
+      State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
   // We can only affect layout of the first and the last line, so the penalty
   // for all other lines is constant, and we ignore it.
-  State.Column = Current.CodePointsInLastLine;
+  State.Column = Current.LastLineColumnWidth;
 
   if (ColumnsUsed > getColumnLimit(State))
     return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
@@ -659,14 +659,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
       return 0;
 
     Token.reset(new BreakableStringLiteral(
-        Current, StartColumn, State.Line->InPPDirective, Encoding));
+        Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
   } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
     unsigned OriginalStartColumn =
         SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
         1;
     Token.reset(new BreakableBlockComment(
-        Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
-        State.Line->InPPDirective, Encoding));
+        Current, StartColumn, OriginalStartColumn, !Current.Previous,
+        State.Line->InPPDirective, Encoding, Style));
   } else if (Current.Type == TT_LineComment &&
              (Current.Previous == NULL ||
               Current.Previous->Type != TT_ImplicitStringLiteral)) {
@@ -678,12 +678,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
     // leading whitespace in consecutive lines when changing indentation of
     // the first line similar to what we do with block comments.
     if (Current.isMultiline()) {
-      State.Column = StartColumn + Current.CodePointsInFirstLine;
+      State.Column = StartColumn + Current.FirstLineColumnWidth;
       return 0;
     }
 
-    Token.reset(new BreakableLineComment(Current, StartColumn,
-                                         State.Line->InPPDirective, Encoding));
+    Token.reset(new BreakableLineComment(
+        Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
   } else {
     return 0;
   }
diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h
index e9e9ae71c7..356334d537 100644
--- a/lib/Format/Encoding.h
+++ b/lib/Format/Encoding.h
@@ -18,6 +18,7 @@
 
 #include "clang/Basic/LLVM.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Unicode.h"
 
 namespace clang {
 namespace format {
@@ -57,6 +58,37 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) {
   }
 }
 
+/// \brief Returns the number of columns required to display the \p Text on a
+/// generic Unicode-capable terminal. Text is assumed to use the specified
+/// \p Encoding.
+inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
+  if (Encoding == Encoding_UTF8) {
+    int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
+    if (ContentWidth >= 0)
+      return ContentWidth;
+  }
+  return Text.size();
+}
+
+/// \brief Returns the number of columns required to display the \p Text,
+/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
+/// text is assumed to use the specified \p Encoding.
+inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
+                                    unsigned TabWidth, Encoding Encoding) {
+  unsigned TotalWidth = 0;
+  StringRef Tail = Text;
+  for (;;) {
+    StringRef::size_type TabPos = Tail.find('\t');
+    if (TabPos == StringRef::npos)
+      return TotalWidth + columnWidth(Tail, Encoding);
+    int Width = columnWidth(Tail.substr(0, TabPos), Encoding);
+    assert(Width >= 0);
+    TotalWidth += Width;
+    TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
+    Tail = Tail.substr(TabPos + 1);
+  }
+}
+
 /// \brief Gets the number of bytes in a sequence representing a single
 /// codepoint and starting with FirstChar in the specified Encoding.
 inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index c482c402ec..02adc5acd6 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -136,6 +136,7 @@ template <> struct MappingTraits<clang::format::FormatStyle> {
     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
     IO.mapOptional("Standard", Style.Standard);
     IO.mapOptional("IndentWidth", Style.IndentWidth);
+    IO.mapOptional("TabWidth", Style.TabWidth);
     IO.mapOptional("UseTab", Style.UseTab);
     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
     IO.mapOptional("IndentFunctionDeclarationAfterType",
@@ -184,6 +185,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.IndentCaseLabels = false;
   LLVMStyle.IndentFunctionDeclarationAfterType = false;
   LLVMStyle.IndentWidth = 2;
+  LLVMStyle.TabWidth = 8;
   LLVMStyle.MaxEmptyLinesToKeep = 1;
   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
@@ -225,6 +227,7 @@ FormatStyle getGoogleStyle() {
   GoogleStyle.IndentCaseLabels = true;
   GoogleStyle.IndentFunctionDeclarationAfterType = true;
   GoogleStyle.IndentWidth = 2;
+  GoogleStyle.TabWidth = 8;
   GoogleStyle.MaxEmptyLinesToKeep = 1;
   GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
@@ -629,7 +632,7 @@ private:
           ++Column;
           break;
         case '\t':
-          Column += Style.IndentWidth - Column % Style.IndentWidth;
+          Column += Style.TabWidth - Column % Style.TabWidth;
           break;
         default:
           ++Column;
@@ -681,10 +684,12 @@ private:
       StringRef Text = FormatTok->TokenText;
       size_t FirstNewlinePos = Text.find('\n');
       if (FirstNewlinePos != StringRef::npos) {
-        FormatTok->CodePointsInFirstLine = encoding::getCodePointCount(
-            Text.substr(0, FirstNewlinePos), Encoding);
-        FormatTok->CodePointsInLastLine = encoding::getCodePointCount(
-            Text.substr(Text.find_last_of('\n') + 1), Encoding);
+        // FIXME: Handle embedded tabs.
+        FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs(
+            Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding);
+        FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+            Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
+            Encoding);
       }
     }
     // FIXME: Add the CodePointCount to Column.
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index 0b770f30e6..e4342dd131 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -83,7 +83,7 @@ class AnnotatedLine;
 struct FormatToken {
   FormatToken()
       : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
-        CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
+        CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
         IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
         BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
         CanBreakBefore(false), ClosesTemplateDeclaration(false),
@@ -120,15 +120,15 @@ struct FormatToken {
   /// \brief Contains the number of code points in the first line of a
   /// multi-line string literal or comment. Zero if there's no newline in the
   /// token.
-  unsigned CodePointsInFirstLine;
+  unsigned FirstLineColumnWidth;
 
   /// \brief Contains the number of code points in the last line of a
   /// multi-line string literal or comment. Can be zero for line comments.
-  unsigned CodePointsInLastLine;
+  unsigned LastLineColumnWidth;
 
   /// \brief Returns \c true if the token text contains newlines (escaped or
   /// not).
-  bool isMultiline() const { return CodePointsInFirstLine != 0; }
+  bool isMultiline() const { return FirstLineColumnWidth != 0; }
 
   /// \brief Indicates that this is the first token.
   bool IsFirst;
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index 41519b64c4..0f46e62563 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -272,8 +272,8 @@ std::string WhitespaceManager::getIndentText(unsigned Spaces) {
   if (!Style.UseTab)
     return std::string(Spaces, ' ');
 
-  return std::string(Spaces / Style.IndentWidth, '\t') +
-         std::string(Spaces % Style.IndentWidth, ' ');
+  return std::string(Spaces / Style.TabWidth, '\t') +
+         std::string(Spaces % Style.TabWidth, ' ');
 }
 
 } // namespace format
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index 4c66ef0c28..2307b03f51 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -5638,9 +5638,41 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
                    "}",
                    21, 0, Tab));
 
+  Tab.TabWidth = 4;
+  Tab.IndentWidth = 8;
+  verifyFormat("class TabWidth4Indent8 {\n"
+               "\t\tvoid f() {\n"
+               "\t\t\t\tsomeFunction(parameter1,\n"
+               "\t\t\t\t\t\t\t parameter2);\n"
+               "\t\t}\n"
+               "};",
+               Tab);
+
+  Tab.TabWidth = 4;
+  Tab.IndentWidth = 4;
+  verifyFormat("class TabWidth4Indent4 {\n"
+               "\tvoid f() {\n"
+               "\t\tsomeFunction(parameter1,\n"
+               "\t\t\t\t\t parameter2);\n"
+               "\t}\n"
+               "};",
+               Tab);
+
+  Tab.TabWidth = 8;
+  Tab.IndentWidth = 4;
+  verifyFormat("class TabWidth8Indent4 {\n"
+               "    void f() {\n"
+               "\tsomeFunction(parameter1,\n"
+               "\t\t     parameter2);\n"
+               "    }\n"
+               "};",
+               Tab);
+
   // FIXME: To correctly count mixed whitespace we need to
   // also correctly count mixed whitespace in front of the comment.
-  //
+
+  // Tab.TabWidth = 8;
+  // Tab.IndentWidth = 8;
   // EXPECT_EQ("/*\n"
   //           "\t      a\t\tcomment\n"
   //           "\t      in multiple lines\n"
@@ -6074,15 +6106,15 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) {
   verifyFormat("\"ÐÐ´Ð½Ð°Ð¶Ð´Ñ Ð² ÑÑÑÐ´ÑÐ½ÑÑ Ð·Ð¸Ð¼Ð½ÑÑ Ð¿Ð¾ÑÑ...\"",
                getLLVMStyleWithColumns(35));
   verifyFormat("\"ä¸ äº ä¸ å äº å­ ä¸ å« ä¹ å\"",
-               getLLVMStyleWithColumns(21));
+               getLLVMStyleWithColumns(31));
   verifyFormat("// ÐÐ´Ð½Ð°Ð¶Ð´Ñ Ð² ÑÑÑÐ´ÑÐ½ÑÑ Ð·Ð¸Ð¼Ð½ÑÑ Ð¿Ð¾ÑÑ...",
                getLLVMStyleWithColumns(36));
   verifyFormat("// ä¸ äº ä¸ å äº å­ ä¸ å« ä¹ å",
-               getLLVMStyleWithColumns(22));
+               getLLVMStyleWithColumns(32));
   verifyFormat("/* ÐÐ´Ð½Ð°Ð¶Ð´Ñ Ð² ÑÑÑÐ´ÑÐ½ÑÑ Ð·Ð¸Ð¼Ð½ÑÑ Ð¿Ð¾ÑÑ... */",
                getLLVMStyleWithColumns(39));
   verifyFormat("/* ä¸ äº ä¸ å äº å­ ä¸ å« ä¹ å */",
-               getLLVMStyleWithColumns(25));
+               getLLVMStyleWithColumns(35));
 }
 
 TEST_F(FormatTest, SplitsUTF8Strings) {
@@ -6093,11 +6125,29 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
       "\"Ð¿Ð¾ÑÑ,\"",
       format("\"ÐÐ´Ð½Ð°Ð¶Ð´Ñ, Ð² ÑÑÑÐ´ÑÐ½ÑÑ Ð·Ð¸Ð¼Ð½ÑÑ Ð¿Ð¾ÑÑ,\"",
              getLLVMStyleWithColumns(13)));
-  EXPECT_EQ("\"ä¸ äº ä¸ å \"\n"
-            "\"äº å­ ä¸ å« \"\n"
-            "\"ä¹ å\"",
-            format("\"ä¸ äº ä¸ å äº å­ ä¸ å« ä¹ å\"",
-                   getLLVMStyleWithColumns(10)));
+  EXPECT_EQ("\"ä¸ äº ä¸ \"\n"
+            "\"å äºå­ \"\n"
+            "\"ä¸ å« ä¹ \"\n"
+            "\"å\"",
+            format("\"ä¸ äº ä¸ å äºå­ ä¸ å« ä¹ å\"",
+                   getLLVMStyleWithColumns(11)));
+  EXPECT_EQ("\"ä¸\täº \"\n"
+            "\"\tä¸ \"\n"
+            "\"å äº\tå­ \"\n"
+            "\"\tä¸ \"\n"
+            "\"å«ä¹å\tqq\"",
+            format("\"ä¸\täº \tä¸ å äº\tå­ \tä¸ å«ä¹å\tqq\"",
+                   getLLVMStyleWithColumns(11)));
+}
+
+
+TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) {
+  EXPECT_EQ("const char *sssss =\n"
+            "    \"ä¸äºä¸åäºå­ä¸å«\\\n"
+            " ä¹ å\";",
+            format("const char *sssss = \"ä¸äºä¸åäºå­ä¸å«\\\n"
+                   " ä¹ å\";",
+                   getLLVMStyleWithColumns(30)));
 }
 
 TEST_F(FormatTest, SplitsUTF8LineComments) {
@@ -6109,9 +6159,9 @@ TEST_F(FormatTest, SplitsUTF8LineComments) {
                    getLLVMStyleWithColumns(13)));
   EXPECT_EQ("// ä¸äºä¸\n"
             "// åäºå­ä¸\n"
-            "// å«\n"
-            "// ä¹ å",
-            format("// ä¸äºä¸ åäºå­ä¸ å«  ä¹ å", getLLVMStyleWithColumns(6)));
+            "// å«  ä¹\n"
+            "// å",
+            format("// ä¸äºä¸ åäºå­ä¸ å«  ä¹ å", getLLVMStyleWithColumns(9)));
 }
 
 TEST_F(FormatTest, SplitsUTF8BlockComments) {
@@ -6126,18 +6176,20 @@ TEST_F(FormatTest, SplitsUTF8BlockComments) {
             format("/* ÐÐ»ÑÐ¶Ñ, Ð¿Ð¾Ð´Ð½Ð¸Ð¼Ð°ÐµÑÑÑ Ð¼ÐµÐ´Ð»ÐµÐ½Ð½Ð¾ Ð² Ð³Ð¾ÑÑ\n"
                    " * ÐÐ¾ÑÐ°Ð´ÐºÐ°, Ð²ÐµÐ·ÑÑÐ°Ñ ÑÐ²Ð¾ÑÐ¾ÑÑÑ Ð²Ð¾Ð·. */",
                    getLLVMStyleWithColumns(13)));
-  EXPECT_EQ("/* ä¸äºä¸\n"
-            " * åäºå­ä¸\n"
-            " * å«\n"
-            " * ä¹ å\n"
-            " */",
-            format("/* ä¸äºä¸ åäºå­ä¸ å«  ä¹ å */", getLLVMStyleWithColumns(6)));
+  EXPECT_EQ(
+      "/* ä¸äºä¸\n"
+      " * åäºå­ä¸\n"
+      " * å«  ä¹\n"
+      " * å  */",
+      format("/* ä¸äºä¸ åäºå­ä¸ å«  ä¹ å  */", getLLVMStyleWithColumns(9)));
   EXPECT_EQ("/* ð£ð®ð¼ð½ ð£ð¬ð²ð¯\n"
             " * ððªð¥ð\n"
             " * ðð¿ð±-ð  */",
             format("/* ð£ð®ð¼ð½ ð£ð¬ð²ð¯ ððªð¥ð ðð¿ð±-ð  */", getLLVMStyleWithColumns(12)));
 }
 
+#endif // _MSC_VER
+
 TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
   FormatStyle Style = getLLVMStyle();
 
@@ -6185,8 +6237,6 @@ TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
                Style);
 }
 
-#endif
-
 TEST_F(FormatTest, FormatsWithWebKitStyle) {
   FormatStyle Style = getWebKitStyle();