From 8ab09da1faaa33b9fa78de59cc4e191bfe9907b5 Mon Sep 17 00:00:00 2001 From: Richard Trieu Date: Wed, 13 Jun 2012 20:25:24 +0000 Subject: [PATCH] Moved the StringLiteral printing code from StmtPrinter into the StringLiteral class and have StmtPrinter and StmtDumper refer to it. This fixes an assertion failure when dumping wchar string literals. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158417 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/AST/Expr.h | 2 + lib/AST/Expr.cpp | 93 ++++++++++++++++++++++++++++++++++++ lib/AST/StmtDumper.cpp | 12 +---- lib/AST/StmtPrinter.cpp | 88 +--------------------------------- test/Misc/ast-dump-wchar.cpp | 13 +++++ 5 files changed, 110 insertions(+), 98 deletions(-) create mode 100644 test/Misc/ast-dump-wchar.cpp diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h index 40da0705f3..371263296f 100644 --- a/include/clang/AST/Expr.h +++ b/include/clang/AST/Expr.h @@ -1399,6 +1399,8 @@ public: getByteLength()); } + void outputString(raw_ostream &OS); + uint32_t getCodeUnit(size_t i) const { assert(i < Length && "out of bounds access"); if (CharByteWidth == 1) diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp index d3f6a521f1..5bbf7503f8 100644 --- a/lib/AST/Expr.cpp +++ b/lib/AST/Expr.cpp @@ -633,6 +633,99 @@ StringLiteral *StringLiteral::CreateEmpty(ASTContext &C, unsigned NumStrs) { return SL; } +void StringLiteral::outputString(raw_ostream &OS) { + switch (getKind()) { + case Ascii: break; // no prefix. + case Wide: OS << 'L'; break; + case UTF8: OS << "u8"; break; + case UTF16: OS << 'u'; break; + case UTF32: OS << 'U'; break; + } + OS << '"'; + static const char Hex[] = "0123456789ABCDEF"; + + unsigned LastSlashX = getLength(); + for (unsigned I = 0, N = getLength(); I != N; ++I) { + switch (uint32_t Char = getCodeUnit(I)) { + default: + // FIXME: Convert UTF-8 back to codepoints before rendering. + + // Convert UTF-16 surrogate pairs back to codepoints before rendering. + // Leave invalid surrogates alone; we'll use \x for those. + if (getKind() == UTF16 && I != N - 1 && Char >= 0xd800 && + Char <= 0xdbff) { + uint32_t Trail = getCodeUnit(I + 1); + if (Trail >= 0xdc00 && Trail <= 0xdfff) { + Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00); + ++I; + } + } + + if (Char > 0xff) { + // If this is a wide string, output characters over 0xff using \x + // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a + // codepoint: use \x escapes for invalid codepoints. + if (getKind() == Wide || + (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) { + // FIXME: Is this the best way to print wchar_t? + OS << "\\x"; + int Shift = 28; + while ((Char >> Shift) == 0) + Shift -= 4; + for (/**/; Shift >= 0; Shift -= 4) + OS << Hex[(Char >> Shift) & 15]; + LastSlashX = I; + break; + } + + if (Char > 0xffff) + OS << "\\U00" + << Hex[(Char >> 20) & 15] + << Hex[(Char >> 16) & 15]; + else + OS << "\\u"; + OS << Hex[(Char >> 12) & 15] + << Hex[(Char >> 8) & 15] + << Hex[(Char >> 4) & 15] + << Hex[(Char >> 0) & 15]; + break; + } + + // If we used \x... for the previous character, and this character is a + // hexadecimal digit, prevent it being slurped as part of the \x. + if (LastSlashX + 1 == I) { + switch (Char) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + OS << "\"\""; + } + } + + assert(Char <= 0xff && + "Characters above 0xff should already have been handled."); + + if (isprint(Char)) + OS << (char)Char; + else // Output anything hard as an octal escape. + OS << '\\' + << (char)('0' + ((Char >> 6) & 7)) + << (char)('0' + ((Char >> 3) & 7)) + << (char)('0' + ((Char >> 0) & 7)); + break; + // Handle some common non-printable cases to make dumps prettier. + case '\\': OS << "\\\\"; break; + case '"': OS << "\\\""; break; + case '\n': OS << "\\n"; break; + case '\t': OS << "\\t"; break; + case '\a': OS << "\\a"; break; + case '\b': OS << "\\b"; break; + } + } + OS << '"'; +} + void StringLiteral::setString(ASTContext &C, StringRef Str, StringKind Kind, bool IsPascal) { //FIXME: we assume that the string data comes from a target that uses the same diff --git a/lib/AST/StmtDumper.cpp b/lib/AST/StmtDumper.cpp index df0052760b..a57cce8371 100644 --- a/lib/AST/StmtDumper.cpp +++ b/lib/AST/StmtDumper.cpp @@ -446,18 +446,8 @@ void StmtDumper::VisitFloatingLiteral(FloatingLiteral *Node) { void StmtDumper::VisitStringLiteral(StringLiteral *Str) { DumpExpr(Str); - // FIXME: this doesn't print wstrings right. OS << " "; - switch (Str->getKind()) { - case StringLiteral::Ascii: break; // No prefix - case StringLiteral::Wide: OS << 'L'; break; - case StringLiteral::UTF8: OS << "u8"; break; - case StringLiteral::UTF16: OS << 'u'; break; - case StringLiteral::UTF32: OS << 'U'; break; - } - OS << '"'; - OS.write_escaped(Str->getString()); - OS << '"'; + Str->outputString(OS); } void StmtDumper::VisitUnaryOperator(UnaryOperator *Node) { diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp index 30548aea60..cb757cdde1 100644 --- a/lib/AST/StmtPrinter.cpp +++ b/lib/AST/StmtPrinter.cpp @@ -739,93 +739,7 @@ void StmtPrinter::VisitImaginaryLiteral(ImaginaryLiteral *Node) { } void StmtPrinter::VisitStringLiteral(StringLiteral *Str) { - switch (Str->getKind()) { - case StringLiteral::Ascii: break; // no prefix. - case StringLiteral::Wide: OS << 'L'; break; - case StringLiteral::UTF8: OS << "u8"; break; - case StringLiteral::UTF16: OS << 'u'; break; - case StringLiteral::UTF32: OS << 'U'; break; - } - OS << '"'; - static const char Hex[] = "0123456789ABCDEF"; - - unsigned LastSlashX = Str->getLength(); - for (unsigned I = 0, N = Str->getLength(); I != N; ++I) { - switch (uint32_t Char = Str->getCodeUnit(I)) { - default: - // FIXME: Convert UTF-8 back to codepoints before rendering. - - // Convert UTF-16 surrogate pairs back to codepoints before rendering. - // Leave invalid surrogates alone; we'll use \x for those. - if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 && - Char >= 0xd800 && Char <= 0xdbff) { - uint32_t Trail = Str->getCodeUnit(I + 1); - if (Trail >= 0xdc00 && Trail <= 0xdfff) { - Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00); - ++I; - } - } - - if (Char > 0xff) { - // If this is a wide string, output characters over 0xff using \x - // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a - // codepoint: use \x escapes for invalid codepoints. - if (Str->getKind() == StringLiteral::Wide || - (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) { - // FIXME: Is this the best way to print wchar_t? - OS << "\\x"; - int Shift = 28; - while ((Char >> Shift) == 0) - Shift -= 4; - for (/**/; Shift >= 0; Shift -= 4) - OS << Hex[(Char >> Shift) & 15]; - LastSlashX = I; - break; - } - - if (Char > 0xffff) - OS << "\\U00" - << Hex[(Char >> 20) & 15] - << Hex[(Char >> 16) & 15]; - else - OS << "\\u"; - OS << Hex[(Char >> 12) & 15] - << Hex[(Char >> 8) & 15] - << Hex[(Char >> 4) & 15] - << Hex[(Char >> 0) & 15]; - break; - } - - // If we used \x... for the previous character, and this character is a - // hexadecimal digit, prevent it being slurped as part of the \x. - if (LastSlashX + 1 == I) { - switch (Char) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - OS << "\"\""; - } - } - - if (Char <= 0xff && isprint(Char)) - OS << (char)Char; - else // Output anything hard as an octal escape. - OS << '\\' - << (char)('0' + ((Char >> 6) & 7)) - << (char)('0' + ((Char >> 3) & 7)) - << (char)('0' + ((Char >> 0) & 7)); - break; - // Handle some common non-printable cases to make dumps prettier. - case '\\': OS << "\\\\"; break; - case '"': OS << "\\\""; break; - case '\n': OS << "\\n"; break; - case '\t': OS << "\\t"; break; - case '\a': OS << "\\a"; break; - case '\b': OS << "\\b"; break; - } - } - OS << '"'; + Str->outputString(OS); } void StmtPrinter::VisitParenExpr(ParenExpr *Node) { OS << "("; diff --git a/test/Misc/ast-dump-wchar.cpp b/test/Misc/ast-dump-wchar.cpp new file mode 100644 index 0000000000..4153706bd6 --- /dev/null +++ b/test/Misc/ast-dump-wchar.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -std=c++11 -ast-dump %s 2>&1 | FileCheck %s + +char c8[] = u8"test\0\\\"\t\a\b\234"; +// CHECK: char c8[12] = (StringLiteral {{.*}} lvalue u8"test\000\\\"\t\a\b\234") + +char16_t c16[] = u"test\0\\\"\t\a\b\234\u1234"; +// CHECK: char16_t c16[13] = (StringLiteral {{.*}} lvalue u"test\000\\\"\t\a\b\234\u1234") + +char32_t c32[] = U"test\0\\\"\t\a\b\234\u1234\U0010ffff"; // \ +// CHECK: char32_t c32[14] = (StringLiteral {{.*}} lvalue U"test\000\\\"\t\a\b\234\u1234\U0010FFFF") + +wchar_t wc[] = L"test\0\\\"\t\a\b\234\u1234\xffffffff"; // \ +// CHECK: wchar_t wc[14] = (StringLiteral {{.*}} lvalue L"test\000\\\"\t\a\b\234\x1234\xFFFFFFFF") -- 2.50.1