Fix assertions and wrong output from StmtPrinter's string literal printing.

author Richard Smith <richard-llvm@metafoo.co.uk>

Thu, 5 Apr 2012 00:17:44 +0000 (00:17 +0000)

committer Richard Smith <richard-llvm@metafoo.co.uk>

Thu, 5 Apr 2012 00:17:44 +0000 (00:17 +0000)
author Richard Smith <richard-llvm@metafoo.co.uk>
Thu, 5 Apr 2012 00:17:44 +0000 (00:17 +0000)
committer Richard Smith <richard-llvm@metafoo.co.uk>
Thu, 5 Apr 2012 00:17:44 +0000 (00:17 +0000)
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp

index 7ebc1299f07a928fd90744b16b62410ffeff26c6..ef5eefb306cdfc12cdc5c93ccbd2c33e9b5c74b1 100644 (file)
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -727,12 +727,40 @@ void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
    OS << '"';
    static char Hex[] = "0123456789ABCDEF";
  
+  unsigned LastSlashX = Str->getLength();
    for (unsigned I = 0, N = Str->getLength(); I != N; ++I) {
      switch (uint32_t Char = Str->getCodeUnit(I)) {
      default:
-      // FIXME: Is this the best way to print wchar_t?
+      // FIXME: Convert UTF-8 back to codepoints before rendering.
+
+      // Convert UTF-16 surrogate pairs back to codepoints before rendering.
+      // Leave invalid surrogates alone; we'll use \x for those.
+      if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 &&
+          Char >= 0xd800 && Char <= 0xdbff) {
+        uint32_t Trail = Str->getCodeUnit(I + 1);
+        if (Trail >= 0xdc00 && Trail <= 0xdfff) {
+          Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00);
+          ++I;
+        }
+      }
+
        if (Char > 0xff) {
-        assert(Char <= 0x10ffff && "invalid unicode codepoint");
+        // If this is a wide string, output characters over 0xff using \x
+        // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a
+        // codepoint: use \x escapes for invalid codepoints.
+        if (Str->getKind() == StringLiteral::Wide ||
+            (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) {
+          // FIXME: Is this the best way to print wchar_t?
+          OS << "\\x";
+          int Shift = 28;
+          while ((Char >> Shift) == 0)
+            Shift -= 4;
+          for (/**/; Shift >= 0; Shift -= 4)
+            OS << Hex[(Char >> Shift) & 15];
+          LastSlashX = I;
+          break;
+        }
+
          if (Char > 0xffff)
            OS << "\\U00"
               << Hex[(Char >> 20) & 15]
@@ -745,13 +773,26 @@ void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
             << Hex[(Char >>  0) & 15];
          break;
        }
+
+      // If we used \x... for the previous character, and this character is a
+      // hexadecimal digit, prevent it being slurped as part of the \x.
+      if (LastSlashX + 1 == I) {
+        switch (Char) {
+          case '0': case '1': case '2': case '3': case '4':
+          case '5': case '6': case '7': case '8': case '9':
+          case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+          case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+            OS << "\"\"";
+        }
+      }
+
        if (Char <= 0xff && isprint(Char))
          OS << (char)Char;
        else  // Output anything hard as an octal escape.
          OS << '\\'
-        << (char)('0'+ ((Char >> 6) & 7))
-        << (char)('0'+ ((Char >> 3) & 7))
-        << (char)('0'+ ((Char >> 0) & 7));
+           << (char)('0' + ((Char >> 6) & 7))
+           << (char)('0' + ((Char >> 3) & 7))
+           << (char)('0' + ((Char >> 0) & 7));
        break;
      // Handle some common non-printable cases to make dumps prettier.
      case '\\': OS << "\\\\"; break;
diff --git a/test/SemaCXX/constexpr-printing.cpp b/test/SemaCXX/constexpr-printing.cpp

index 4e5bc429dbdd5b1f2a5a6a3c5d95809c38a28cc1..fc0cce25eb85013ff5c83bdee440a089b7f669b1 100644 (file)
--- a/test/SemaCXX/constexpr-printing.cpp
+++ b/test/SemaCXX/constexpr-printing.cpp
@@ -85,8 +85,8 @@ constexpr char16_t c16 = get(u"test\0\\\"\t\a\b\234\u1234"); // \
    expected-error {{}} expected-note {{u"test\000\\\"\t\a\b\234\u1234"}}
  constexpr char32_t c32 = get(U"test\0\\\"\t\a\b\234\u1234\U0010ffff"); // \
    expected-error {{}} expected-note {{U"test\000\\\"\t\a\b\234\u1234\U0010FFFF"}}
-constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234"); // \
-  expected-error {{}} expected-note {{L"test\000\\\"\t\a\b\234\u1234"}}
+constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234\xffffffff"); // \
+  expected-error {{}} expected-note {{L"test\000\\\"\t\a\b\234\x1234\xFFFFFFFF"}}
  
  constexpr char32_t c32_err = get(U"\U00110000"); // expected-error {{invalid universal character}}
  
diff --git a/test/SemaCXX/static-assert.cpp b/test/SemaCXX/static-assert.cpp

index 2b44e81d3e6c4793e03bdccf6080c6da4dab0a7c..68ef0183e252948d7a559b54255d3fc58c53bd6e 100644 (file)
--- a/test/SemaCXX/static-assert.cpp
+++ b/test/SemaCXX/static-assert.cpp
@@ -27,3 +27,10 @@ template<typename T> struct S {
  
  S<char> s1; // expected-note {{in instantiation of template class 'S<char>' requested here}}
  S<int> s2;
+
+static_assert(false, L"\xFFFFFFFF"); // expected-error {{static_assert failed L"\xFFFFFFFF"}}
+static_assert(false, u"\U000317FF"); // expected-error {{static_assert failed u"\U000317FF"}}
+// FIXME: render this as u8"\u03A9"
+static_assert(false, u8"Ω"); // expected-error {{static_assert failed u8"\316\251"}}
+static_assert(false, L"\u1234"); // expected-error {{static_assert failed L"\x1234"}}
+static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static_assert failed L"\x1FF""0\x123""fx\xFFFFFgoop"}}
author	Richard Smith <richard-llvm@metafoo.co.uk>
	Thu, 5 Apr 2012 00:17:44 +0000 (00:17 +0000)
committer	Richard Smith <richard-llvm@metafoo.co.uk>
	Thu, 5 Apr 2012 00:17:44 +0000 (00:17 +0000)
lib/AST/StmtPrinter.cpp		patch \| blob \| history
test/SemaCXX/constexpr-printing.cpp		patch \| blob \| history
test/SemaCXX/static-assert.cpp		patch \| blob \| history