From: Chris Lattner Date: Thu, 17 Dec 2009 05:29:40 +0000 (+0000) Subject: reimplement r90860, fixing a couple of problems: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a2bf105739cef7aea8e4d219629c627fcd11742e;p=clang reimplement r90860, fixing a couple of problems: 1. Don't make a copy of LangOptions every time a lexer is created. 2. Don't make CharInfo global mutable state. 3. Fix the implementation to properly treat ^Z as EOF instead of as horizontal whitespace, which matches the semantic implemented by VC++. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@91586 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index a91e40435c..d5a46433c3 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -33,7 +33,7 @@ #include using namespace clang; -static void InitCharacterInfo(LangOptions); +static void InitCharacterInfo(); //===----------------------------------------------------------------------===// // Token Class Implementation @@ -59,7 +59,7 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { void Lexer::InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd) { - InitCharacterInfo(Features); + InitCharacterInfo(); BufferStart = BufStart; BufferPtr = BufPtr; @@ -254,7 +254,7 @@ enum { // Statically initialize CharInfo table based on ASCII character set // Reference: FreeBSD 7.2 /usr/share/misc/ascii -static unsigned char CharInfo[256] = +static const unsigned char CharInfo[256] = { // 0 NUL 1 SOH 2 STX 3 ETX // 4 EOT 5 ENQ 6 ACK 7 BEL @@ -322,7 +322,7 @@ static unsigned char CharInfo[256] = 0 , 0 , 0 , 0 }; -static void InitCharacterInfo(LangOptions Features) { +static void InitCharacterInfo() { static bool isInited = false; if (isInited) return; // check the statically-initialized CharInfo table @@ -341,10 +341,6 @@ static void InitCharacterInfo(LangOptions Features) { for (unsigned i = '0'; i <= '9'; ++i) assert(CHAR_NUMBER == CharInfo[i]); - if (Features.Microsoft) - // Hack to treat DOS & CP/M EOF (^Z) as horizontal whitespace. - CharInfo[26/*sub*/] = CHAR_HORZ_WS; - isInited = true; } @@ -1549,6 +1545,22 @@ LexNextToken: return; // KeepWhitespaceMode goto LexNextToken; // GCC isn't tail call eliminating. + + case 26: // DOS & CP/M EOF: "^Z". + // If we're in Microsoft extensions mode, treat this as end of file. + if (Features.Microsoft) { + // Read the PP instance variable into an automatic variable, because + // LexEndOfFile will often delete 'this'. + Preprocessor *PPCache = PP; + if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file. + return; // Got a token to return. + assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); + return PPCache->Lex(Result); + } + // If Microsoft extensions are disabled, this is just random garbage. + Kind = tok::unknown; + break; + case '\n': case '\r': // If we are inside a preprocessor directive and we see the end of line, @@ -1599,7 +1611,7 @@ LexNextToken: goto SkipHorizontalWhitespace; } goto LexNextToken; // GCC isn't tail call eliminating. - + // C99 6.4.4.1: Integer Constants. // C99 6.4.4.2: Floating Constants. case '0': case '1': case '2': case '3': case '4': diff --git a/test/Lexer/msdos-cpm-eof.c b/test/Lexer/msdos-cpm-eof.c index 0a6956d6b3..9ef6e32ea0 100644 --- a/test/Lexer/msdos-cpm-eof.c +++ b/test/Lexer/msdos-cpm-eof.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions %s -int a; +int x;  + +I am random garbage after ^Z