Fixes various problems with accounting for tabs in the original code.

author Manuel Klimek <klimek@google.com>

Thu, 29 Aug 2013 15:21:40 +0000 (15:21 +0000)

committer Manuel Klimek <klimek@google.com>

Thu, 29 Aug 2013 15:21:40 +0000 (15:21 +0000)
author Manuel Klimek <klimek@google.com>
Thu, 29 Aug 2013 15:21:40 +0000 (15:21 +0000)
committer Manuel Klimek <klimek@google.com>
Thu, 29 Aug 2013 15:21:40 +0000 (15:21 +0000)
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp

index 3982ba6a77180c9209e9e094870642b0b4dd854e..3c740d9d84ca9d95355fd434a5e2850c5f11c62c 100644 (file)
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -487,11 +487,11 @@ private:
  
  class FormatTokenLexer {
  public:
-  FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr,
+  FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
                     encoding::Encoding Encoding)
-      : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
-        SourceMgr(SourceMgr), IdentTable(getFormattingLangOpts()),
-        Encoding(Encoding) {
+      : FormatTok(NULL), GreaterStashed(false), Column(0),
+        TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
+        IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
      Lex.SetKeepWhitespaceMode(true);
    }
  
@@ -509,6 +509,7 @@ private:
    FormatToken *getNextToken() {
      if (GreaterStashed) {
        // Create a synthesized second '>' token.
+      // FIXME: Increment Column and set OriginalColumn.
        Token Greater = FormatTok->Tok;
        FormatTok = new (Allocator.Allocate()) FormatToken;
        FormatTok->Tok = Greater;
@@ -532,13 +533,29 @@ private:
      // Consume and record whitespace until we find a significant token.
      unsigned WhitespaceLength = TrailingWhitespace;
      while (FormatTok->Tok.is(tok::unknown)) {
-      unsigned Newlines = FormatTok->TokenText.count('\n');
-      if (Newlines > 0)
-        FormatTok->LastNewlineOffset =
-            WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1;
-      FormatTok->NewlinesBefore += Newlines;
-      unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n");
-      FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
+      for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
+        switch (FormatTok->TokenText[i]) {
+        case '\n':
+          ++FormatTok->NewlinesBefore;
+          // FIXME: This is technically incorrect, as it could also
+          // be a literal backslash at the end of the line.
+          if (i == 0 || FormatTok->TokenText[i-1] != '\\')
+            FormatTok->HasUnescapedNewline = true;
+          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+          Column = 0;
+          break;
+        case ' ':
+          ++Column;
+          break;
+        case '\t':
+          Column += Style.IndentWidth - Column % Style.IndentWidth;
+          break;
+        default:
+          ++Column;
+          break;
+        }
+      }
+
        WhitespaceLength += FormatTok->Tok.getLength();
  
        readRawToken(*FormatTok);
@@ -554,11 +571,14 @@ private:
             FormatTok->TokenText[1] == '\n') {
        // FIXME: ++FormatTok->NewlinesBefore is missing...
        WhitespaceLength += 2;
+      Column = 0;
        FormatTok->TokenText = FormatTok->TokenText.substr(2);
      }
+    FormatTok->OriginalColumn = Column;
  
      TrailingWhitespace = 0;
      if (FormatTok->Tok.is(tok::comment)) {
+      // FIXME: Add the trimmed whitespace to Column.
        StringRef UntrimmedText = FormatTok->TokenText;
        FormatTok->TokenText = FormatTok->TokenText.rtrim();
        TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
@@ -576,6 +596,8 @@ private:
      FormatTok->CodePointCount =
          encoding::getCodePointCount(FormatTok->TokenText, Encoding);
  
+    // FIXME: Add the CodePointCount to Column.
+
      FormatTok->WhitespaceRange = SourceRange(
          WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
      return FormatTok;
@@ -583,9 +605,11 @@ private:
  
    FormatToken *FormatTok;
    bool GreaterStashed;
+  unsigned Column;
    unsigned TrailingWhitespace;
    Lexer &Lex;
    SourceManager &SourceMgr;
+  FormatStyle &Style;
    IdentifierTable IdentTable;
    encoding::Encoding Encoding;
    llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
@@ -595,7 +619,6 @@ private:
      Lex.LexFromRawLexer(Tok.Tok);
      Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
                                Tok.Tok.getLength());
-
      // For formatting, treat unterminated string literals like normal string
      // literals.
      if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
@@ -622,7 +645,7 @@ public:
    virtual ~Formatter() {}
  
    tooling::Replacements format() {
-    FormatTokenLexer Tokens(Lex, SourceMgr, Encoding);
+    FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
  
      UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
      bool StructuralError = Parser.parse();
@@ -692,9 +715,7 @@ public:
            formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent,
                             TheLine.InPPDirective);
          } else {
-          Indent = LevelIndent =
-              SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) -
-              1;
+          Indent = LevelIndent = FirstTok->OriginalColumn;
          }
          ContinuationIndenter Indenter(Style, SourceMgr, TheLine, Indent,
                                        Whitespaces, Encoding,
@@ -727,8 +748,7 @@ public:
               Tok = Tok->Next) {
            if (Tok == TheLine.First &&
                (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
-            unsigned LevelIndent =
-                SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1;
+            unsigned LevelIndent = Tok->OriginalColumn;
              // Remove trailing whitespace of the previous line if it was
              // touched.
              if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h

index 62b0ba6adf2a967e6cf6ab8acbdccdb76d9d4209..950938395e5d93862eb874e1f3f79b9f6490b3bf 100644 (file)
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -169,9 +169,18 @@ struct FormatToken {
    /// \brief If this is an opening parenthesis, how are the parameters packed?
    ParameterPackingKind PackingKind;
  
-  /// \brief The total length of the line up to and including this token.
+  /// \brief The total length of the unwrapped line up to and including this
+  /// token.
    unsigned TotalLength;
  
+  /// \brief The original column of this token, including expanded tabs.
+  /// The configured IndentWidth is used as tab width. Only tabs in whitespace
+  /// are expanded.
+  /// FIXME: This is currently only used on the first token of an unwrapped
+  /// line, and the implementation is not correct for other tokens (see the
+  /// FIXMEs in FormatTokenLexer::getNextToken()).
+  unsigned OriginalColumn;
+
    /// \brief The length of following tokens until the next natural split point,
    /// or the next token that can be broken.
    unsigned UnbreakableTailLength;
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp

index 122b367a223c3364c3d9d719a66cf9dc348d21d0..b20007d1e08930f596a82375ce29e1cc68cc317a 100644 (file)
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -5528,7 +5528,33 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
                 "\t\t    parameter2); \\\n"
                 "\t}",
                 Tab);
-
+  EXPECT_EQ("void f() {\n"
+            "\tf();\n"
+            "\tg();\n"
+            "}",
+            format("void f() {\n"
+                   "\tf();\n"
+                   "\tg();\n"
+                   "}",
+                   0, 0, Tab));
+  EXPECT_EQ("void f() {\n"
+            "\tf();\n"
+            "\tg();\n"
+            "}",
+            format("void f() {\n"
+                   "\tf();\n"
+                   "\tg();\n"
+                   "}",
+                   16, 0, Tab));
+  EXPECT_EQ("void f() {\n"
+            "  \tf();\n"
+            "\tg();\n"
+            "}",
+            format("void f() {\n"
+                   "  \tf();\n"
+                   "  \tg();\n"
+                   "}",
+                   21, 0, Tab));
  
    // FIXME: To correctly count mixed whitespace we need to
    // also correctly count mixed whitespace in front of the comment.
author	Manuel Klimek <klimek@google.com>
	Thu, 29 Aug 2013 15:21:40 +0000 (15:21 +0000)
committer	Manuel Klimek <klimek@google.com>
	Thu, 29 Aug 2013 15:21:40 +0000 (15:21 +0000)
lib/Format/Format.cpp		patch \| blob \| history
lib/Format/FormatToken.h		patch \| blob \| history
unittests/Format/FormatTest.cpp		patch \| blob \| history