Incorporate feedback from Eli.

author Steve Naroff <snaroff@apple.com>

Tue, 31 Mar 2009 10:29:45 +0000 (10:29 +0000)

committer Steve Naroff <snaroff@apple.com>

Tue, 31 Mar 2009 10:29:45 +0000 (10:29 +0000)
author Steve Naroff <snaroff@apple.com>
Tue, 31 Mar 2009 10:29:45 +0000 (10:29 +0000)
committer Steve Naroff <snaroff@apple.com>
Tue, 31 Mar 2009 10:29:45 +0000 (10:29 +0000)
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp

index dcd239d5abd41235658536c90135656e3d50fb88..11bb1f25ad2dde5aca79020a2ee7cfedcaa1589d 100644 (file)
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -154,10 +154,11 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
  /// When we decide to implement UCN's for character constants and identifiers,
  /// we will likely rework our support for UCN's.
  static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, 
-                             char *&ResultBuf, const char *ResultBufEnd,
-                             bool &HadError, 
-                             SourceLocation Loc, Preprocessor &PP) {
+                             char *&ResultBuf, bool &HadError, 
+                             SourceLocation Loc, bool IsWide, Preprocessor &PP) 
+{
    // FIXME: Add a warning - UCN's are only valid in C++ & C99.
+  // FIXME: Handle wide strings.
    
    // Skip the '\u' char's.
    ThisTokBuf += 2;
@@ -183,10 +184,11 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
      HadError = 1;
      return;
    }
-  // Check UCN constraints (C99 6.4.3p2)
+  // Check UCN constraints (C99 6.4.3p2). 
    if ((UcnVal < 0xa0 &&
        (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 )) // $, @, `
-      || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF)) {
+      || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF) 
+      || (UcnVal > 0x10FFFF)) /* the maximum legal UTF32 value */ {
      PP.Diag(Loc, diag::err_ucn_escape_invalid);
      HadError = 1;
      return;
@@ -207,20 +209,13 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
    else
      bytesToWrite = 4;
         
-  // If the buffer isn't big enough, bail.
-  if ((ResultBuf + bytesToWrite) >= ResultBufEnd) {
-    PP.Diag(Loc, diag::err_ucn_escape_too_big);
-    HadError = 1;
-    return;
-  }
    const unsigned byteMask = 0xBF;
    const unsigned byteMark = 0x80;
    
    // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
-  // into the first byte, depending on how many bytes follow.  There are
-  // as many entries in this table as there are UTF8 sequence types.
-  static const UTF8 firstByteMark[7] = { 
-    0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC 
+  // into the first byte, depending on how many bytes follow.
+  static const UTF8 firstByteMark[5] = { 
+    0x00, 0x00, 0xC0, 0xE0, 0xF0
    };
    // Finally, we write the bytes into ResultBuf.
    ResultBuf += bytesToWrite;
@@ -846,8 +841,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
        
        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
          ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, 
-                         GetString() + ResultBuf.size(),
-                         hadError, StringToks[i].getLocation(), PP);
+                         hadError, StringToks[i].getLocation(), ThisIsWide, PP);
        } else {
          // Otherwise, this is a non-UCN escape character.  Process it.
          unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
diff --git a/test/Sema/ucn-cstring.c b/test/Sema/ucn-cstring.c

index ec760f4180e72ef56e4a7b4780ddaca752f3bdfe..6d021fd82d16079181e46eda0de987933dea25c0 100644 (file)
--- a/test/Sema/ucn-cstring.c
+++ b/test/Sema/ucn-cstring.c
@@ -3,6 +3,8 @@
  #include <stdio.h>
  
  int main(void) {
+  int a[sizeof("hello \u2192 \u2603 \u2190 world") == 24 ? 1 : -1];
+  
    printf("%s (%d)\n", "hello \u2192 \u2603 \u2190 world", sizeof("hello \u2192 \u2603 \u2190 world"));
    printf("%s (%d)\n", "\U00010400\U0001D12B", sizeof("\U00010400\U0001D12B"));
    // Some error conditions...
author	Steve Naroff <snaroff@apple.com>
	Tue, 31 Mar 2009 10:29:45 +0000 (10:29 +0000)
committer	Steve Naroff <snaroff@apple.com>
	Tue, 31 Mar 2009 10:29:45 +0000 (10:29 +0000)
lib/Lex/LiteralSupport.cpp		patch \| blob \| history
test/Sema/ucn-cstring.c		patch \| blob \| history