From cc90d37097d7e62dd9ee126b259acd43976a2b5a Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Tue, 27 Aug 2019 00:13:52 +0000 Subject: [PATCH] [clang-scan-deps] Skip UTF-8 BOM in source minimizer Differential Revision: https://reviews.llvm.org/D66511 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@369993 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Lex/DependencyDirectivesSourceMinimizer.cpp | 7 +++++++ .../minimize_source_to_dependency_directives_utf8bom.c | 10 ++++++++++ 2 files changed, 17 insertions(+) create mode 100644 test/Lexer/minimize_source_to_dependency_directives_utf8bom.c diff --git a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp index 265a6e44e3..a350481df9 100644 --- a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -834,7 +834,14 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { return lexDefault(Kind, Id.Name, First, End); } +static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { + if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && + First[2] == '\xbf') + First += 3; +} + bool Minimizer::minimizeImpl(const char *First, const char *const End) { + skipUTF8ByteOrderMark(First, End); while (First != End) if (lexPPLine(First, End)) return true; diff --git a/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c b/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c new file mode 100644 index 0000000000..305442fbd2 --- /dev/null +++ b/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c @@ -0,0 +1,10 @@ +// Test UTF8 BOM at start of file +// RUN: printf '\xef\xbb\xbf' > %t.c +// RUN: echo '#ifdef TEST\n' >> %t.c +// RUN: echo '#include ' >> %t.c +// RUN: echo '#endif' >> %t.c +// RUN: %clang_cc1 -DTEST -print-dependency-directives-minimized-source %t.c 2>&1 | FileCheck %s + +// CHECK: #ifdef TEST +// CHECK-NEXT: #include +// CHECK-NEXT: #endif -- 2.40.0