From: Richard Trieu Date: Wed, 13 Jun 2012 20:25:24 +0000 (+0000) Subject: Moved the StringLiteral printing code from StmtPrinter into the StringLiteral X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8ab09da1faaa33b9fa78de59cc4e191bfe9907b5;p=clang Moved the StringLiteral printing code from StmtPrinter into the StringLiteral class and have StmtPrinter and StmtDumper refer to it. This fixes an assertion failure when dumping wchar string literals. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158417 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h index 40da0705f3..371263296f 100644 --- a/include/clang/AST/Expr.h +++ b/include/clang/AST/Expr.h @@ -1399,6 +1399,8 @@ public: getByteLength()); } + void outputString(raw_ostream &OS); + uint32_t getCodeUnit(size_t i) const { assert(i < Length && "out of bounds access"); if (CharByteWidth == 1) diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp index d3f6a521f1..5bbf7503f8 100644 --- a/lib/AST/Expr.cpp +++ b/lib/AST/Expr.cpp @@ -633,6 +633,99 @@ StringLiteral *StringLiteral::CreateEmpty(ASTContext &C, unsigned NumStrs) { return SL; } +void StringLiteral::outputString(raw_ostream &OS) { + switch (getKind()) { + case Ascii: break; // no prefix. + case Wide: OS << 'L'; break; + case UTF8: OS << "u8"; break; + case UTF16: OS << 'u'; break; + case UTF32: OS << 'U'; break; + } + OS << '"'; + static const char Hex[] = "0123456789ABCDEF"; + + unsigned LastSlashX = getLength(); + for (unsigned I = 0, N = getLength(); I != N; ++I) { + switch (uint32_t Char = getCodeUnit(I)) { + default: + // FIXME: Convert UTF-8 back to codepoints before rendering. + + // Convert UTF-16 surrogate pairs back to codepoints before rendering. + // Leave invalid surrogates alone; we'll use \x for those. + if (getKind() == UTF16 && I != N - 1 && Char >= 0xd800 && + Char <= 0xdbff) { + uint32_t Trail = getCodeUnit(I + 1); + if (Trail >= 0xdc00 && Trail <= 0xdfff) { + Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00); + ++I; + } + } + + if (Char > 0xff) { + // If this is a wide string, output characters over 0xff using \x + // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a + // codepoint: use \x escapes for invalid codepoints. + if (getKind() == Wide || + (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) { + // FIXME: Is this the best way to print wchar_t? + OS << "\\x"; + int Shift = 28; + while ((Char >> Shift) == 0) + Shift -= 4; + for (/**/; Shift >= 0; Shift -= 4) + OS << Hex[(Char >> Shift) & 15]; + LastSlashX = I; + break; + } + + if (Char > 0xffff) + OS << "\\U00" + << Hex[(Char >> 20) & 15] + << Hex[(Char >> 16) & 15]; + else + OS << "\\u"; + OS << Hex[(Char >> 12) & 15] + << Hex[(Char >> 8) & 15] + << Hex[(Char >> 4) & 15] + << Hex[(Char >> 0) & 15]; + break; + } + + // If we used \x... for the previous character, and this character is a + // hexadecimal digit, prevent it being slurped as part of the \x. + if (LastSlashX + 1 == I) { + switch (Char) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + OS << "\"\""; + } + } + + assert(Char <= 0xff && + "Characters above 0xff should already have been handled."); + + if (isprint(Char)) + OS << (char)Char; + else // Output anything hard as an octal escape. + OS << '\\' + << (char)('0' + ((Char >> 6) & 7)) + << (char)('0' + ((Char >> 3) & 7)) + << (char)('0' + ((Char >> 0) & 7)); + break; + // Handle some common non-printable cases to make dumps prettier. + case '\\': OS << "\\\\"; break; + case '"': OS << "\\\""; break; + case '\n': OS << "\\n"; break; + case '\t': OS << "\\t"; break; + case '\a': OS << "\\a"; break; + case '\b': OS << "\\b"; break; + } + } + OS << '"'; +} + void StringLiteral::setString(ASTContext &C, StringRef Str, StringKind Kind, bool IsPascal) { //FIXME: we assume that the string data comes from a target that uses the same diff --git a/lib/AST/StmtDumper.cpp b/lib/AST/StmtDumper.cpp index df0052760b..a57cce8371 100644 --- a/lib/AST/StmtDumper.cpp +++ b/lib/AST/StmtDumper.cpp @@ -446,18 +446,8 @@ void StmtDumper::VisitFloatingLiteral(FloatingLiteral *Node) { void StmtDumper::VisitStringLiteral(StringLiteral *Str) { DumpExpr(Str); - // FIXME: this doesn't print wstrings right. OS << " "; - switch (Str->getKind()) { - case StringLiteral::Ascii: break; // No prefix - case StringLiteral::Wide: OS << 'L'; break; - case StringLiteral::UTF8: OS << "u8"; break; - case StringLiteral::UTF16: OS << 'u'; break; - case StringLiteral::UTF32: OS << 'U'; break; - } - OS << '"'; - OS.write_escaped(Str->getString()); - OS << '"'; + Str->outputString(OS); } void StmtDumper::VisitUnaryOperator(UnaryOperator *Node) { diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp index 30548aea60..cb757cdde1 100644 --- a/lib/AST/StmtPrinter.cpp +++ b/lib/AST/StmtPrinter.cpp @@ -739,93 +739,7 @@ void StmtPrinter::VisitImaginaryLiteral(ImaginaryLiteral *Node) { } void StmtPrinter::VisitStringLiteral(StringLiteral *Str) { - switch (Str->getKind()) { - case StringLiteral::Ascii: break; // no prefix. - case StringLiteral::Wide: OS << 'L'; break; - case StringLiteral::UTF8: OS << "u8"; break; - case StringLiteral::UTF16: OS << 'u'; break; - case StringLiteral::UTF32: OS << 'U'; break; - } - OS << '"'; - static const char Hex[] = "0123456789ABCDEF"; - - unsigned LastSlashX = Str->getLength(); - for (unsigned I = 0, N = Str->getLength(); I != N; ++I) { - switch (uint32_t Char = Str->getCodeUnit(I)) { - default: - // FIXME: Convert UTF-8 back to codepoints before rendering. - - // Convert UTF-16 surrogate pairs back to codepoints before rendering. - // Leave invalid surrogates alone; we'll use \x for those. - if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 && - Char >= 0xd800 && Char <= 0xdbff) { - uint32_t Trail = Str->getCodeUnit(I + 1); - if (Trail >= 0xdc00 && Trail <= 0xdfff) { - Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00); - ++I; - } - } - - if (Char > 0xff) { - // If this is a wide string, output characters over 0xff using \x - // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a - // codepoint: use \x escapes for invalid codepoints. - if (Str->getKind() == StringLiteral::Wide || - (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) { - // FIXME: Is this the best way to print wchar_t? - OS << "\\x"; - int Shift = 28; - while ((Char >> Shift) == 0) - Shift -= 4; - for (/**/; Shift >= 0; Shift -= 4) - OS << Hex[(Char >> Shift) & 15]; - LastSlashX = I; - break; - } - - if (Char > 0xffff) - OS << "\\U00" - << Hex[(Char >> 20) & 15] - << Hex[(Char >> 16) & 15]; - else - OS << "\\u"; - OS << Hex[(Char >> 12) & 15] - << Hex[(Char >> 8) & 15] - << Hex[(Char >> 4) & 15] - << Hex[(Char >> 0) & 15]; - break; - } - - // If we used \x... for the previous character, and this character is a - // hexadecimal digit, prevent it being slurped as part of the \x. - if (LastSlashX + 1 == I) { - switch (Char) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - OS << "\"\""; - } - } - - if (Char <= 0xff && isprint(Char)) - OS << (char)Char; - else // Output anything hard as an octal escape. - OS << '\\' - << (char)('0' + ((Char >> 6) & 7)) - << (char)('0' + ((Char >> 3) & 7)) - << (char)('0' + ((Char >> 0) & 7)); - break; - // Handle some common non-printable cases to make dumps prettier. - case '\\': OS << "\\\\"; break; - case '"': OS << "\\\""; break; - case '\n': OS << "\\n"; break; - case '\t': OS << "\\t"; break; - case '\a': OS << "\\a"; break; - case '\b': OS << "\\b"; break; - } - } - OS << '"'; + Str->outputString(OS); } void StmtPrinter::VisitParenExpr(ParenExpr *Node) { OS << "("; diff --git a/test/Misc/ast-dump-wchar.cpp b/test/Misc/ast-dump-wchar.cpp new file mode 100644 index 0000000000..4153706bd6 --- /dev/null +++ b/test/Misc/ast-dump-wchar.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -std=c++11 -ast-dump %s 2>&1 | FileCheck %s + +char c8[] = u8"test\0\\\"\t\a\b\234"; +// CHECK: char c8[12] = (StringLiteral {{.*}} lvalue u8"test\000\\\"\t\a\b\234") + +char16_t c16[] = u"test\0\\\"\t\a\b\234\u1234"; +// CHECK: char16_t c16[13] = (StringLiteral {{.*}} lvalue u"test\000\\\"\t\a\b\234\u1234") + +char32_t c32[] = U"test\0\\\"\t\a\b\234\u1234\U0010ffff"; // \ +// CHECK: char32_t c32[14] = (StringLiteral {{.*}} lvalue U"test\000\\\"\t\a\b\234\u1234\U0010FFFF") + +wchar_t wc[] = L"test\0\\\"\t\a\b\234\u1234\xffffffff"; // \ +// CHECK: wchar_t wc[14] = (StringLiteral {{.*}} lvalue L"test\000\\\"\t\a\b\234\x1234\xFFFFFFFF")