From: Alexander Kornienko <alexfh@google.com>
Date: Wed, 13 Nov 2013 14:04:17 +0000 (+0000)
Subject: Correctly mark first token in the presence of UTF-8 BOM.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a9f280942e3129a3d9e051bcf1bb5616243f212c;p=clang

Correctly mark first token in the presence of UTF-8 BOM.

Summary: Fixes http://llvm.org/PR17753

Reviewers: klimek

Reviewed By: klimek

CC: cfe-commits, klimek

Differential Revision: http://llvm-reviews.chandlerc.com/D2159

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194576 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index a5fc83e84e..64ffe9e8f9 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -996,7 +996,7 @@ class FormatTokenLexer {
 public:
   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
                    encoding::Encoding Encoding)
-      : FormatTok(NULL), GreaterStashed(false), Column(0),
+      : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
     Lex.SetKeepWhitespaceMode(true);
@@ -1069,8 +1069,8 @@ private:
     readRawToken(*FormatTok);
     SourceLocation WhitespaceStart =
         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
-    if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
-      FormatTok->IsFirst = true;
+    FormatTok->IsFirst = IsFirstToken;
+    IsFirstToken = false;
 
     // Consume and record whitespace until we find a significant token.
     unsigned WhitespaceLength = TrailingWhitespace;
@@ -1181,6 +1181,7 @@ private:
   }
 
   FormatToken *FormatTok;
+  bool IsFirstToken;
   bool GreaterStashed;
   unsigned Column;
   unsigned TrailingWhitespace;
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index f2ac3ba94f..b6574c7503 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -6911,6 +6911,14 @@ TEST_F(FormatTest, WorksFor8bitEncodings) {
                    getLLVMStyleWithColumns(12)));
 }
 
+TEST_F(FormatTest, HandlesUTF8BOM) {
+  EXPECT_EQ("\xef\xbb\xbf", format("\xef\xbb\xbf"));
+  EXPECT_EQ("\xef\xbb\xbf#include <iostream>",
+            format("\xef\xbb\xbf#include <iostream>"));
+  EXPECT_EQ("\xef\xbb\xbf\n#include <iostream>",
+            format("\xef\xbb\xbf\n#include <iostream>"));
+}
+
 // FIXME: Encode Cyrillic and CJK characters below to appease MS compilers.
 #if !defined(_MSC_VER)