From: Eli Friedman Date: Wed, 2 Nov 2011 23:06:23 +0000 (+0000) Subject: Use native endianness for writing out character escapes to the result buffer for... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=caf1f26777c3adf2556c3af7bf9e01bd8ead17d9;p=clang Use native endianness for writing out character escapes to the result buffer for string literal parsing. No functional change on little-endian architectures; should fix test failures on PPC. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@143585 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index b107531e14..de10c3a824 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -253,31 +253,32 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported"); if (CharByteWidth == 4) { - // Note: our internal rep of wide char tokens is always little-endian. - *ResultBuf++ = (UcnVal & 0x000000FF); - *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; - *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16; - *ResultBuf++ = (UcnVal & 0xFF000000) >> 24; + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *ResultPtr = reinterpret_cast(ResultBuf); + *ResultPtr = UcnVal; + ResultBuf += 4; return; } if (CharByteWidth == 2) { - // Convert to UTF16. + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *ResultPtr = reinterpret_cast(ResultBuf); + if (UcnVal < (UTF32)0xFFFF) { - *ResultBuf++ = (UcnVal & 0x000000FF); - *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; + *ResultPtr = UcnVal; + ResultBuf += 2; return; } + // FIXME: We shouldn't print a diagnostic for UTF-16 mode. if (Diags) Diags->Report(Loc, diag::warn_ucn_escape_too_large); - typedef uint16_t UTF16; + // Convert to UTF16. UcnVal -= 0x10000; - UTF16 surrogate1 = 0xD800 + (UcnVal >> 10); - UTF16 surrogate2 = 0xDC00 + (UcnVal & 0x3FF); - *ResultBuf++ = (surrogate1 & 0x000000FF); - *ResultBuf++ = (surrogate1 & 0x0000FF00) >> 8; - *ResultBuf++ = (surrogate2 & 0x000000FF); - *ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8; + *ResultPtr = 0xD800 + (UcnVal >> 10); + *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF); + ResultBuf += 4; return; } @@ -1090,11 +1091,22 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ FullSourceLoc(StringToks[i].getLocation(), SM), CharByteWidth*8, Diags); - // Note: our internal rep of wide char tokens is always little-endian. - *ResultPtr++ = ResultChar & 0xFF; - - for (unsigned i = 1, e = CharByteWidth; i != e; ++i) - *ResultPtr++ = ResultChar >> i*8; + if (CharByteWidth == 4) { + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *ResultWidePtr = reinterpret_cast(ResultPtr); + *ResultWidePtr = ResultChar & 0xFF; + ResultPtr += 4; + } else if (CharByteWidth == 2) { + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *ResultWidePtr = reinterpret_cast(ResultPtr); + *ResultWidePtr = ResultChar & 0xFF; + ResultPtr += 2; + } else { + assert(CharByteWidth == 1 && "Unexpected char width"); + *ResultPtr++ = ResultChar & 0xFF; + } } } }