From 26d41bffe2da8282ce4116b7251568380c8b2ffb Mon Sep 17 00:00:00 2001 From: Sanne Wouda Date: Fri, 7 Apr 2017 10:13:00 +0000 Subject: [PATCH] Skip Unicode character expansion in assembly files Summary: When using the C preprocessor with assembly files, either with a capital `S` file extension, or with `-xassembler-with-cpp`, the Unicode escape sequence `\u` is ignored. The `\u` pattern can be used for expanding a macro argument that starts with `u`. Author: Salman Arif Reviewers: rengolin, olista01 Reviewed By: olista01 Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D31765 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@299754 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Lex/Lexer.cpp | 20 +++++++++++--------- test/Lexer/asm-preproc-no-unicode.s | 8 ++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) create mode 100644 test/Lexer/asm-preproc-no-unicode.s diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 6025a66751..4c05193947 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -3603,17 +3603,19 @@ LexNextToken: // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': - if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { - if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode + if (!LangOpts.AsmPreprocessor) { + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { + if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; + } - // We only saw whitespace, so just try again with this lexer. - // (We manually eliminate the tail call to avoid recursion.) - goto LexNextToken; + return LexUnicode(Result, CodePoint, CurPtr); } - - return LexUnicode(Result, CodePoint, CurPtr); } Kind = tok::unknown; diff --git a/test/Lexer/asm-preproc-no-unicode.s b/test/Lexer/asm-preproc-no-unicode.s new file mode 100644 index 0000000000..d194a52fec --- /dev/null +++ b/test/Lexer/asm-preproc-no-unicode.s @@ -0,0 +1,8 @@ +// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s + +// CHECK-NOT: warning: \u used with no following hex digits +// CHECK: .word \u + + .macro foo, u + .word \u + .endm -- 2.40.0