From fbfd6426e256f8be081260609d8fa88ae94ffe14 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 21 Oct 2013 05:02:28 +0000 Subject: [PATCH] Lex: Don't restrict legal UCNs when preprocessing assembly The C and C++ standards disallow using universal character names to refer to some characters, such as basic ascii and control characters, so we reject these sequences in the lexer. However, when the preprocessor isn't being used on C or C++, it doesn't make sense to apply these restrictions. Notably, accepting these characters avoids issues with unicode escapes when GHC uses the compiler as a preprocessor on haskell sources. Fixes rdar://problem/14742289 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@193067 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Lex/Lexer.cpp | 4 ++++ test/Preprocessor/assembler-with-cpp.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 3817afe75d..c071455da6 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -2730,6 +2730,10 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, StartPtr = CurPtr; } + // Don't apply C family restrictions to UCNs in assembly mode + if (LangOpts.AsmPreprocessor) + return CodePoint; + // C99 6.4.3p2: A universal character name shall not specify a character whose // short identifier is less than 00A0 other than 0024 ($), 0040 (@), or // 0060 (`), nor one in the range D800 through DFFF inclusive.) diff --git a/test/Preprocessor/assembler-with-cpp.c b/test/Preprocessor/assembler-with-cpp.c index 65be564786..f03cb06ea1 100644 --- a/test/Preprocessor/assembler-with-cpp.c +++ b/test/Preprocessor/assembler-with-cpp.c @@ -72,6 +72,9 @@ 11: T11(b) // CHECK-Identifiers-True: 11: #0 +// Universal character names can specify basic ascii and control characters +12: \u0020\u0030\u0080\u0000 +// CHECK-Identifiers-False: 12: \u0020\u0030\u0080\u0000 // This should not crash // rdar://8823139 -- 2.50.1