Use native endianness for writing out character escapes to the result buffer for...

author Eli Friedman <eli.friedman@gmail.com>

Wed, 2 Nov 2011 23:06:23 +0000 (23:06 +0000)

committer Eli Friedman <eli.friedman@gmail.com>

Wed, 2 Nov 2011 23:06:23 +0000 (23:06 +0000)
author Eli Friedman <eli.friedman@gmail.com>
Wed, 2 Nov 2011 23:06:23 +0000 (23:06 +0000)
committer Eli Friedman <eli.friedman@gmail.com>
Wed, 2 Nov 2011 23:06:23 +0000 (23:06 +0000)
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp

index b107531e144e4af28444c45ddb656036b03bf2b4..de10c3a824a3fc808e8e24878ba0d5b02dba7aed 100644 (file)
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -253,31 +253,32 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
    assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
  
    if (CharByteWidth == 4) {
-    // Note: our internal rep of wide char tokens is always little-endian.
-    *ResultBuf++ = (UcnVal & 0x000000FF);
-    *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
-    *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
-    *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf);
+    *ResultPtr = UcnVal;
+    ResultBuf += 4;
      return;
    }
  
    if (CharByteWidth == 2) {
-    // Convert to UTF16.
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf);
+
      if (UcnVal < (UTF32)0xFFFF) {
-      *ResultBuf++ = (UcnVal & 0x000000FF);
-      *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
+      *ResultPtr = UcnVal;
+      ResultBuf += 2;
        return;
      }
+    // FIXME: We shouldn't print a diagnostic for UTF-16 mode.
      if (Diags) Diags->Report(Loc, diag::warn_ucn_escape_too_large);
  
-    typedef uint16_t UTF16;
+    // Convert to UTF16.
      UcnVal -= 0x10000;
-    UTF16 surrogate1 = 0xD800 + (UcnVal >> 10);
-    UTF16 surrogate2 = 0xDC00 + (UcnVal & 0x3FF);
-    *ResultBuf++ = (surrogate1 & 0x000000FF);
-    *ResultBuf++ = (surrogate1 & 0x0000FF00) >> 8;
-    *ResultBuf++ = (surrogate2 & 0x000000FF);
-    *ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;
+    *ResultPtr     = 0xD800 + (UcnVal >> 10);
+    *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
+    ResultBuf += 4;
      return;
    }
  
@@ -1090,11 +1091,22 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
                              FullSourceLoc(StringToks[i].getLocation(), SM),
                              CharByteWidth*8, Diags);
  
-        // Note: our internal rep of wide char tokens is always little-endian.
-        *ResultPtr++ = ResultChar & 0xFF;
-
-        for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
-          *ResultPtr++ = ResultChar >> i*8;
+        if (CharByteWidth == 4) {
+          // FIXME: Make the type of the result buffer correct instead of
+          // using reinterpret_cast.
+          UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr);
+          *ResultWidePtr = ResultChar & 0xFF;
+          ResultPtr += 4;
+        } else if (CharByteWidth == 2) {
+          // FIXME: Make the type of the result buffer correct instead of
+          // using reinterpret_cast.
+          UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr);
+          *ResultWidePtr = ResultChar & 0xFF;
+          ResultPtr += 2;
+        } else {
+          assert(CharByteWidth == 1 && "Unexpected char width");
+          *ResultPtr++ = ResultChar & 0xFF;
+        }
        }
      }
    }
author	Eli Friedman <eli.friedman@gmail.com>
	Wed, 2 Nov 2011 23:06:23 +0000 (23:06 +0000)
committer	Eli Friedman <eli.friedman@gmail.com>
	Wed, 2 Nov 2011 23:06:23 +0000 (23:06 +0000)