Correctly mark first token in the presence of UTF-8 BOM.

author Alexander Kornienko <alexfh@google.com>

Wed, 13 Nov 2013 14:04:17 +0000 (14:04 +0000)

committer Alexander Kornienko <alexfh@google.com>

Wed, 13 Nov 2013 14:04:17 +0000 (14:04 +0000)
author Alexander Kornienko <alexfh@google.com>
Wed, 13 Nov 2013 14:04:17 +0000 (14:04 +0000)
committer Alexander Kornienko <alexfh@google.com>
Wed, 13 Nov 2013 14:04:17 +0000 (14:04 +0000)
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp

index a5fc83e84e40f4cf93d27f6102d980c143364834..64ffe9e8f98f8adfa75768799ce753679d92a4cf 100644 (file)
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -996,7 +996,7 @@ class FormatTokenLexer {
  public:
    FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
                     encoding::Encoding Encoding)
-      : FormatTok(NULL), GreaterStashed(false), Column(0),
+      : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
          TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
          IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
      Lex.SetKeepWhitespaceMode(true);
@@ -1069,8 +1069,8 @@ private:
      readRawToken(*FormatTok);
      SourceLocation WhitespaceStart =
          FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
-    if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
-      FormatTok->IsFirst = true;
+    FormatTok->IsFirst = IsFirstToken;
+    IsFirstToken = false;
  
      // Consume and record whitespace until we find a significant token.
      unsigned WhitespaceLength = TrailingWhitespace;
@@ -1181,6 +1181,7 @@ private:
    }
  
    FormatToken *FormatTok;
+  bool IsFirstToken;
    bool GreaterStashed;
    unsigned Column;
    unsigned TrailingWhitespace;
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp

index f2ac3ba94fb52aa00a77ddcb188bbde0b24fc996..b6574c7503aaa6ad56d80467c519a7540e654b57 100644 (file)
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -6911,6 +6911,14 @@ TEST_F(FormatTest, WorksFor8bitEncodings) {
                     getLLVMStyleWithColumns(12)));
  }
  
+TEST_F(FormatTest, HandlesUTF8BOM) {
+  EXPECT_EQ("\xef\xbb\xbf", format("\xef\xbb\xbf"));
+  EXPECT_EQ("\xef\xbb\xbf#include <iostream>",
+            format("\xef\xbb\xbf#include <iostream>"));
+  EXPECT_EQ("\xef\xbb\xbf\n#include <iostream>",
+            format("\xef\xbb\xbf\n#include <iostream>"));
+}
+
  // FIXME: Encode Cyrillic and CJK characters below to appease MS compilers.
  #if !defined(_MSC_VER)
author	Alexander Kornienko <alexfh@google.com>
	Wed, 13 Nov 2013 14:04:17 +0000 (14:04 +0000)
committer	Alexander Kornienko <alexfh@google.com>
	Wed, 13 Nov 2013 14:04:17 +0000 (14:04 +0000)
lib/Format/Format.cpp		patch \| blob \| history
unittests/Format/FormatTest.cpp		patch \| blob \| history