]> granicus.if.org Git - clang/commitdiff
Properly track that a character literal is UTF-8, and pretty print the prefix properly.
authorAaron Ballman <aaron@aaronballman.com>
Thu, 7 Jan 2016 20:59:26 +0000 (20:59 +0000)
committerAaron Ballman <aaron@aaronballman.com>
Thu, 7 Jan 2016 20:59:26 +0000 (20:59 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@257097 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/AST/Expr.h
include/clang/AST/Stmt.h
include/clang/Lex/LiteralSupport.h
lib/AST/StmtPrinter.cpp
lib/Lex/LiteralSupport.cpp
lib/Sema/SemaExpr.cpp
lib/Sema/SemaExprObjC.cpp
lib/Sema/SemaTemplate.cpp
lib/Serialization/ASTWriterDecl.cpp
test/Misc/ast-print-char-literal.cpp [new file with mode: 0644]
test/PCH/cxx-char-literal.cpp [new file with mode: 0644]

index 095dd6a1ef348f4a7f6c06b9ee9eb4ced9861ab9..38733eee82c361fe2fac4f8f44df20747e6cf430 100644 (file)
@@ -1292,6 +1292,7 @@ public:
   enum CharacterKind {
     Ascii,
     Wide,
+    UTF8,
     UTF16,
     UTF32
   };
index e48b7dcc28f5645f94778f5d366b970500fed8e5..d3950e92cf0d78dbccb1c38020909d8d3e604aad 100644 (file)
@@ -130,7 +130,7 @@ protected:
     friend class CharacterLiteral;
     unsigned : NumExprBits;
 
-    unsigned Kind : 2;
+    unsigned Kind : 3;
   };
 
   enum APFloatSemantics {
index 5210e3f2e1c506158aeeb63154100fb480820243..d568614e2ae4e75b16f238c98acf0e76be9bf908 100644 (file)
@@ -166,6 +166,7 @@ public:
   bool hadError() const { return HadError; }
   bool isAscii() const { return Kind == tok::char_constant; }
   bool isWide() const { return Kind == tok::wide_char_constant; }
+  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
   bool isUTF16() const { return Kind == tok::utf16_char_constant; }
   bool isUTF32() const { return Kind == tok::utf32_char_constant; }
   bool isMultiChar() const { return IsMultiChar; }
index e55b2fc19a1a3d5b1db05892ec77ceb5d382cbfe..69f52f52b6694595c1f7a43b97d6f20017d7d56a 100644 (file)
@@ -1165,6 +1165,7 @@ void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
   switch (Node->getKind()) {
   case CharacterLiteral::Ascii: break; // no prefix.
   case CharacterLiteral::Wide:  OS << 'L'; break;
+  case CharacterLiteral::UTF8:  OS << "u8"; break;
   case CharacterLiteral::UTF16: OS << 'u'; break;
   case CharacterLiteral::UTF32: OS << 'U'; break;
   }
index 1e7858af8948909be914c70b09cd7f8e6d3cce09..5b1c49344e8de4d3a867be01a5bda3f6e0b84bfc 100644 (file)
@@ -983,6 +983,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
 ///         u' c-char-sequence '
 ///         U' c-char-sequence '
 ///         L' c-char-sequence '
+///         u8' c-char-sequence ' [C++1z lex.ccon]
 ///       c-char-sequence:
 ///         c-char
 ///         c-char-sequence c-char
index 67d5db15cf2516837a5f55a921b95d5b22f55092..76d0ca56c0cb7d2b788088f7b58d3a9488a6899b 100644 (file)
@@ -3084,6 +3084,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
     Kind = CharacterLiteral::UTF16;
   else if (Literal.isUTF32())
     Kind = CharacterLiteral::UTF32;
+  else if (Literal.isUTF8())
+    Kind = CharacterLiteral::UTF8;
 
   Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty,
                                              Tok.getLocation());
index 57a08b94f5e8ff990273d8b7e2fb9b680777ed12..1d86ca35412e8d85988719c251857048444ba7a3 100644 (file)
@@ -319,6 +319,7 @@ ExprResult Sema::BuildObjCNumericLiteral(SourceLocation AtLoc, Expr *Number) {
     // to use to determine the Objective-c literal kind.
     switch (Char->getKind()) {
     case CharacterLiteral::Ascii:
+    case CharacterLiteral::UTF8:
       NumberType = Context.CharTy;
       break;
       
@@ -577,6 +578,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
       // to use to determine the Objective-c literal kind.
       switch (Char->getKind()) {
       case CharacterLiteral::Ascii:
+      case CharacterLiteral::UTF8:
         ValueType = Context.CharTy;
         break;
         
index 6cc85883345db9cde61aacbfdc3c2b69e96c7663..9775e4d940ab3d2161fd2a4b2996f31510271a73 100644 (file)
@@ -5503,6 +5503,8 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg,
 
   Expr *E;
   if (T->isAnyCharacterType()) {
+    // This does not need to handle u8 character literals because those are
+    // of type char, and so can also be covered by an ASCII character literal.
     CharacterLiteral::CharacterKind Kind;
     if (T->isWideCharType())
       Kind = CharacterLiteral::Wide;
index 20ca6d6fd512115f4b63717560477cf53a570318..54bba282ab8deb7e363ec9b4f8c1ee4804af5e3f 100644 (file)
@@ -2033,7 +2033,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   //Character Literal
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getValue
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // getKind
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // getKind
   CharacterLiteralAbbrev = Stream.EmitAbbrev(Abv);
 
   // Abbreviation for EXPR_IMPLICIT_CAST
diff --git a/test/Misc/ast-print-char-literal.cpp b/test/Misc/ast-print-char-literal.cpp
new file mode 100644 (file)
index 0000000..bb5daa2
--- /dev/null
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -ast-print -std=c++1z %s -o - | FileCheck %s
+
+char c = u8'1';
+char d = '1';
+char e = U'1';
+char f = L'1';
+char g = u'1';
+
+template <char c = u8'1'>
+void h();
+
+void i() {
+  h<u8'2'>();
+}
+
+// CHECK: char c = u8'1';
+// CHECK-NEXT: char d = '1';
+// CHECK-NEXT: char e = U'1';
+// CHECK-NEXT: char f = L'1';
+// CHECK-NEXT: char g = u'1';
+
+// CHECK: template <char c = u8'1'>
+
+// CHECK: h<u8'2'>();
diff --git a/test/PCH/cxx-char-literal.cpp b/test/PCH/cxx-char-literal.cpp
new file mode 100644 (file)
index 0000000..0990517
--- /dev/null
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -emit-pch -std=c++1z -o %t %s
+// RUN: %clang_cc1 -std=c++1z -x ast -ast-print %t | FileCheck %s
+
+// Ensure that character literals are properly surfaced through PCH.
+
+char a = '0';
+// CHECK: char a = '0';
+
+char b = L'1';
+// CHECK: char b = L'1';
+
+char c = u8'2';
+// CHECK: char c = u8'2';
+
+char d = U'3';
+// CHECK: char d = U'3';
+
+char e = u'4';
+// CHECK: char e = u'4';