Properly track that a character literal is UTF-8, and pretty print the prefix properly.

author Aaron Ballman <aaron@aaronballman.com>

Thu, 7 Jan 2016 20:59:26 +0000 (20:59 +0000)

committer Aaron Ballman <aaron@aaronballman.com>

Thu, 7 Jan 2016 20:59:26 +0000 (20:59 +0000)
author Aaron Ballman <aaron@aaronballman.com>
Thu, 7 Jan 2016 20:59:26 +0000 (20:59 +0000)
committer Aaron Ballman <aaron@aaronballman.com>
Thu, 7 Jan 2016 20:59:26 +0000 (20:59 +0000)
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h

index 095dd6a1ef348f4a7f6c06b9ee9eb4ced9861ab9..38733eee82c361fe2fac4f8f44df20747e6cf430 100644 (file)
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1292,6 +1292,7 @@ public:
    enum CharacterKind {
      Ascii,
      Wide,
+    UTF8,
      UTF16,
      UTF32
    };
diff --git a/include/clang/AST/Stmt.h b/include/clang/AST/Stmt.h

index e48b7dcc28f5645f94778f5d366b970500fed8e5..d3950e92cf0d78dbccb1c38020909d8d3e604aad 100644 (file)
--- a/include/clang/AST/Stmt.h
+++ b/include/clang/AST/Stmt.h
@@ -130,7 +130,7 @@ protected:
      friend class CharacterLiteral;
      unsigned : NumExprBits;
  
-    unsigned Kind : 2;
+    unsigned Kind : 3;
    };
  
    enum APFloatSemantics {
diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h

index 5210e3f2e1c506158aeeb63154100fb480820243..d568614e2ae4e75b16f238c98acf0e76be9bf908 100644 (file)
--- a/include/clang/Lex/LiteralSupport.h
+++ b/include/clang/Lex/LiteralSupport.h
@@ -166,6 +166,7 @@ public:
    bool hadError() const { return HadError; }
    bool isAscii() const { return Kind == tok::char_constant; }
    bool isWide() const { return Kind == tok::wide_char_constant; }
+  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
    bool isUTF16() const { return Kind == tok::utf16_char_constant; }
    bool isUTF32() const { return Kind == tok::utf32_char_constant; }
    bool isMultiChar() const { return IsMultiChar; }
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp

index e55b2fc19a1a3d5b1db05892ec77ceb5d382cbfe..69f52f52b6694595c1f7a43b97d6f20017d7d56a 100644 (file)
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -1165,6 +1165,7 @@ void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
    switch (Node->getKind()) {
    case CharacterLiteral::Ascii: break; // no prefix.
    case CharacterLiteral::Wide:  OS << 'L'; break;
+  case CharacterLiteral::UTF8:  OS << "u8"; break;
    case CharacterLiteral::UTF16: OS << 'u'; break;
    case CharacterLiteral::UTF32: OS << 'U'; break;
    }
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp

index 1e7858af8948909be914c70b09cd7f8e6d3cce09..5b1c49344e8de4d3a867be01a5bda3f6e0b84bfc 100644 (file)
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -983,6 +983,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
  ///         u' c-char-sequence '
  ///         U' c-char-sequence '
  ///         L' c-char-sequence '
+///         u8' c-char-sequence ' [C++1z lex.ccon]
  ///       c-char-sequence:
  ///         c-char
  ///         c-char-sequence c-char
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp

index 67d5db15cf2516837a5f55a921b95d5b22f55092..76d0ca56c0cb7d2b788088f7b58d3a9488a6899b 100644 (file)
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -3084,6 +3084,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
      Kind = CharacterLiteral::UTF16;
    else if (Literal.isUTF32())
      Kind = CharacterLiteral::UTF32;
+  else if (Literal.isUTF8())
+    Kind = CharacterLiteral::UTF8;
  
    Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty,
                                               Tok.getLocation());
diff --git a/lib/Sema/SemaExprObjC.cpp b/lib/Sema/SemaExprObjC.cpp

index 57a08b94f5e8ff990273d8b7e2fb9b680777ed12..1d86ca35412e8d85988719c251857048444ba7a3 100644 (file)
--- a/lib/Sema/SemaExprObjC.cpp
+++ b/lib/Sema/SemaExprObjC.cpp
@@ -319,6 +319,7 @@ ExprResult Sema::BuildObjCNumericLiteral(SourceLocation AtLoc, Expr *Number) {
      // to use to determine the Objective-c literal kind.
      switch (Char->getKind()) {
      case CharacterLiteral::Ascii:
+    case CharacterLiteral::UTF8:
        NumberType = Context.CharTy;
        break;
        
@@ -577,6 +578,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
        // to use to determine the Objective-c literal kind.
        switch (Char->getKind()) {
        case CharacterLiteral::Ascii:
+      case CharacterLiteral::UTF8:
          ValueType = Context.CharTy;
          break;
          
diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp

index 6cc85883345db9cde61aacbfdc3c2b69e96c7663..9775e4d940ab3d2161fd2a4b2996f31510271a73 100644 (file)
--- a/lib/Sema/SemaTemplate.cpp
+++ b/lib/Sema/SemaTemplate.cpp
@@ -5503,6 +5503,8 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg,
  
    Expr *E;
    if (T->isAnyCharacterType()) {
+    // This does not need to handle u8 character literals because those are
+    // of type char, and so can also be covered by an ASCII character literal.
      CharacterLiteral::CharacterKind Kind;
      if (T->isWideCharType())
        Kind = CharacterLiteral::Wide;
diff --git a/lib/Serialization/ASTWriterDecl.cpp b/lib/Serialization/ASTWriterDecl.cpp

index 20ca6d6fd512115f4b63717560477cf53a570318..54bba282ab8deb7e363ec9b4f8c1ee4804af5e3f 100644 (file)
--- a/lib/Serialization/ASTWriterDecl.cpp
+++ b/lib/Serialization/ASTWriterDecl.cpp
@@ -2033,7 +2033,7 @@ void ASTWriter::WriteDeclAbbrevs() {
    //Character Literal
    Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getValue
    Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // getKind
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // getKind
    CharacterLiteralAbbrev = Stream.EmitAbbrev(Abv);
  
    // Abbreviation for EXPR_IMPLICIT_CAST
diff --git a/test/Misc/ast-print-char-literal.cpp b/test/Misc/ast-print-char-literal.cpp

new file mode 100644 (file)

index 0000000..bb5daa2
--- /dev/null
+++ b/test/Misc/ast-print-char-literal.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -ast-print -std=c++1z %s -o - | FileCheck %s
+
+char c = u8'1';
+char d = '1';
+char e = U'1';
+char f = L'1';
+char g = u'1';
+
+template <char c = u8'1'>
+void h();
+
+void i() {
+  h<u8'2'>();
+}
+
+// CHECK: char c = u8'1';
+// CHECK-NEXT: char d = '1';
+// CHECK-NEXT: char e = U'1';
+// CHECK-NEXT: char f = L'1';
+// CHECK-NEXT: char g = u'1';
+
+// CHECK: template <char c = u8'1'>
+
+// CHECK: h<u8'2'>();
diff --git a/test/PCH/cxx-char-literal.cpp b/test/PCH/cxx-char-literal.cpp

new file mode 100644 (file)

index 0000000..0990517
--- /dev/null
+++ b/test/PCH/cxx-char-literal.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -emit-pch -std=c++1z -o %t %s
+// RUN: %clang_cc1 -std=c++1z -x ast -ast-print %t | FileCheck %s
+
+// Ensure that character literals are properly surfaced through PCH.
+
+char a = '0';
+// CHECK: char a = '0';
+
+char b = L'1';
+// CHECK: char b = L'1';
+
+char c = u8'2';
+// CHECK: char c = u8'2';
+
+char d = U'3';
+// CHECK: char d = U'3';
+
+char e = u'4';
+// CHECK: char e = u'4';
author	Aaron Ballman <aaron@aaronballman.com>
	Thu, 7 Jan 2016 20:59:26 +0000 (20:59 +0000)
committer	Aaron Ballman <aaron@aaronballman.com>
	Thu, 7 Jan 2016 20:59:26 +0000 (20:59 +0000)
include/clang/AST/Expr.h		patch \| blob \| history
include/clang/AST/Stmt.h		patch \| blob \| history
include/clang/Lex/LiteralSupport.h		patch \| blob \| history
lib/AST/StmtPrinter.cpp		patch \| blob \| history
lib/Lex/LiteralSupport.cpp		patch \| blob \| history
lib/Sema/SemaExpr.cpp		patch \| blob \| history
lib/Sema/SemaExprObjC.cpp		patch \| blob \| history
lib/Sema/SemaTemplate.cpp		patch \| blob \| history
lib/Serialization/ASTWriterDecl.cpp		patch \| blob \| history
test/Misc/ast-print-char-literal.cpp	[new file with mode: 0644]	patch \| blob
test/PCH/cxx-char-literal.cpp	[new file with mode: 0644]	patch \| blob