]> granicus.if.org Git - clang/commitdiff
[clang-scan-deps] Skip UTF-8 BOM in source minimizer
authorAlexandre Ganea <alexandre.ganea@ubisoft.com>
Tue, 27 Aug 2019 00:13:52 +0000 (00:13 +0000)
committerAlexandre Ganea <alexandre.ganea@ubisoft.com>
Tue, 27 Aug 2019 00:13:52 +0000 (00:13 +0000)
Differential Revision: https://reviews.llvm.org/D66511

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@369993 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Lex/DependencyDirectivesSourceMinimizer.cpp
test/Lexer/minimize_source_to_dependency_directives_utf8bom.c [new file with mode: 0644]

index 265a6e44e33dd1b9d33ecd472352fe29d0b6fbb6..a350481df9aa90cedc6deb191191c2a6ba544ae4 100644 (file)
@@ -834,7 +834,14 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {
   return lexDefault(Kind, Id.Name, First, End);
 }
 
+static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
+  if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
+      First[2] == '\xbf')
+    First += 3;
+}
+
 bool Minimizer::minimizeImpl(const char *First, const char *const End) {
+  skipUTF8ByteOrderMark(First, End);
   while (First != End)
     if (lexPPLine(First, End))
       return true;
diff --git a/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c b/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c
new file mode 100644 (file)
index 0000000..305442f
--- /dev/null
@@ -0,0 +1,10 @@
+// Test UTF8 BOM at start of file\r
+// RUN: printf '\xef\xbb\xbf' > %t.c\r
+// RUN: echo '#ifdef TEST\n' >> %t.c\r
+// RUN: echo '#include <string>' >> %t.c\r
+// RUN: echo '#endif' >> %t.c\r
+// RUN: %clang_cc1 -DTEST -print-dependency-directives-minimized-source %t.c 2>&1 | FileCheck %s\r
+\r
+// CHECK:      #ifdef TEST\r
+// CHECK-NEXT: #include <string>\r
+// CHECK-NEXT: #endif\r