From: Sean Hunt <scshunt@csclub.uwaterloo.ca>
Date: Sun, 29 Aug 2010 21:26:48 +0000 (+0000)
Subject: Implement C++0x user-defined string literals.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0016d519b831859526b79405cdae4c64c73731c8;p=clang

Implement C++0x user-defined string literals.

The extra data stored on user-defined literal Tokens is stored in extra
allocated memory, which is managed by the PreprocessorLexer because there isn't
a better place to put it that makes sure it gets deallocated, but only after
it's used up. My testing has shown no significant slowdown as a result, but
independent testing would be appreciated.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@112458 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/AST/ExprCXX.h b/include/clang/AST/ExprCXX.h
index 450348b9d2..1529b0ca13 100644
--- a/include/clang/AST/ExprCXX.h
+++ b/include/clang/AST/ExprCXX.h
@@ -2422,6 +2422,50 @@ public:
   virtual child_iterator child_end();
 };
 
+/// UDLiteralExpr - An expression for a user-defined
+/// string literal (e.g. "foo"_bar)
+///
+/// Both the DeclRefExpr and the IntegerConstant are fictional expressions
+/// generated from the literal.
+class UDLiteralExpr : public CallExpr {
+  Expr *BaseLiteral;
+
+  static bool isValidLiteral(Expr *E) {
+    return isa<StringLiteral>(E) || isa<FloatingLiteral>(E) ||
+           isa<IntegerLiteral>(E) || isa<CharacterLiteral>(E);
+  }
+public:
+  UDLiteralExpr(ASTContext &C, Expr *E, Expr *fn, Expr **args,
+                unsigned numargs, QualType t)
+    : CallExpr(C, UDLiteralExprClass, fn, args, numargs, t, SourceLocation())
+    , BaseLiteral(E) {
+    assert(isValidLiteral(E) && "Base literal must be an actual literal");
+  }
+
+  FunctionDecl *getLiteralOperator() { return getDirectCallee(); }
+  const FunctionDecl *getLiteralOperator() const { return getDirectCallee(); }
+
+  Expr *getBaseLiteral() { return BaseLiteral; }
+  const Expr *getBaseLiteral() const { return BaseLiteral; }
+  void setBaseLiteral(Expr *E) {
+    assert(isValidLiteral(E) && "Base literal must be an actual literal");
+    BaseLiteral = E;
+  }
+
+  IdentifierInfo *getUDSuffix() const {
+    return getLiteralOperator()->getDeclName().getCXXLiteralIdentifier();
+  }
+
+  virtual SourceRange getSourceRange() const {
+    return getBaseLiteral()->getSourceRange();
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == UDLiteralExprClass;
+  }
+  static bool classof(const UDLiteralExpr *) { return true; }
+};
+
 inline ExplicitTemplateArgumentList &OverloadExpr::getExplicitTemplateArgs() {
   if (isa<UnresolvedLookupExpr>(this))
     return cast<UnresolvedLookupExpr>(this)->getExplicitTemplateArgs();
diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h
index 79ac072fa1..a360cbd427 100644
--- a/include/clang/AST/RecursiveASTVisitor.h
+++ b/include/clang/AST/RecursiveASTVisitor.h
@@ -1694,6 +1694,7 @@ DEF_TRAVERSE_STMT(CXXTemporaryObjectExpr, {
 DEF_TRAVERSE_STMT(CallExpr, { })
 DEF_TRAVERSE_STMT(CXXMemberCallExpr, { })
 DEF_TRAVERSE_STMT(CXXOperatorCallExpr, { })
+DEF_TRAVERSE_STMT(UDLiteralExpr, { })
 
 // These operators (all of them) do not need any action except
 // iterating over the children.
diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td
index 07f53d9867..2896a2f89e 100644
--- a/include/clang/Basic/DiagnosticLexKinds.td
+++ b/include/clang/Basic/DiagnosticLexKinds.td
@@ -99,6 +99,9 @@ def ext_string_too_long : Extension<"string literal of length %0 exceeds "
   "maximum length %1 that %select{C90|ISO C99|C++}2 compilers are required to "
   "support">, InGroup<OverlengthStrings>;
   
+def err_ud_suffix_mismatch : Error<"User-defined literal suffixes on adjacent "
+  "string literal tokens do not match">;
+
 //===----------------------------------------------------------------------===//
 // PTH Diagnostics
 //===----------------------------------------------------------------------===//
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index 32550dc616..e85ce4f483 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2946,9 +2946,15 @@ def err_operator_delete_param_type : Error<
 // C++ literal operators
 def err_literal_operator_outside_namespace : Error<
   "literal operator %0 must be in a namespace or global scope">;
-// FIXME: This diagnostic sucks
+def warn_literal_operator_no_underscore : Warning< "literal operator names not "
+  "beginning with underscores are reserved for future standardization">;
+def err_literal_operator_overload : Error<
+  "no matching literal operator function for user-defined suffix '%0'">;
+def err_literal_operator_deleted : Error<
+  "deleted literal operator function for user-defined suffix '%0'">;
+// FIXME: This should really provide information about what is allowed.
 def err_literal_operator_params : Error<
-  "parameter declaration for literal operator %0 is not valid">;
+  "parameter declaration for literal operator '%0' is not valid">;
 
 // C++ conversion functions
 def err_conv_function_not_member : Error<
diff --git a/include/clang/Basic/StmtNodes.td b/include/clang/Basic/StmtNodes.td
index a2f69730a0..2b446c674a 100644
--- a/include/clang/Basic/StmtNodes.td
+++ b/include/clang/Basic/StmtNodes.td
@@ -112,6 +112,9 @@ def OverloadExpr : DStmt<Expr, 1>;
 def UnresolvedLookupExpr : DStmt<OverloadExpr>;
 def UnresolvedMemberExpr : DStmt<OverloadExpr>;
 
+// C++0x expressions
+def UDLiteralExpr : DStmt<Expr>;
+
 // Obj-C Expressions.
 def ObjCStringLiteral : DStmt<Expr>;
 def ObjCEncodeExpr : DStmt<Expr>;
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 9e0fb7ee70..e05113da76 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -17,6 +17,7 @@
 #include "clang/Lex/PreprocessorLexer.h"
 #include "clang/Basic/LangOptions.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
 #include <string>
 #include <vector>
 #include <cassert>
@@ -67,6 +68,9 @@ class Lexer : public PreprocessorLexer {
   // line" flag set on it.
   bool IsAtStartOfLine;
 
+  // ExtraDataAllocator - An allocator for extra data on a token.
+  llvm::BumpPtrAllocator ExtraDataAllocator;
+
   Lexer(const Lexer&);          // DO NOT IMPLEMENT
   void operator=(const Lexer&); // DO NOT IMPLEMENT
   friend class Preprocessor;
diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h
index ba46fb18a6..40112f5348 100644
--- a/include/clang/Lex/LiteralSupport.h
+++ b/include/clang/Lex/LiteralSupport.h
@@ -27,6 +27,7 @@ class Preprocessor;
 class Token;
 class SourceLocation;
 class TargetInfo;
+class IdentifierInfo;
 
 /// NumericLiteralParser - This performs strict semantic analysis of the content
 /// of a ppnumber, classifying it as either integer, floating, or erroneous,
@@ -145,6 +146,7 @@ class StringLiteralParser {
   unsigned wchar_tByteWidth;
   llvm::SmallString<512> ResultBuf;
   char *ResultPtr; // cursor
+  IdentifierInfo *UDSuffix;
 public:
   StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
                       Preprocessor &PP, bool Complain = true);
@@ -155,6 +157,9 @@ public:
   const char *GetString() { return &ResultBuf[0]; }
   unsigned GetStringLength() const { return ResultPtr-&ResultBuf[0]; }
 
+  bool isUserDefinedLiteral() const { return UDSuffix; }
+  IdentifierInfo *getUDSuffix() const { return UDSuffix; }
+
   unsigned GetNumStringChars() const {
     if (AnyWide)
       return GetStringLength() / wchar_tByteWidth;
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index 6b9b89ea5e..94e858dfb0 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -645,7 +645,7 @@ public:
   /// copy).  The caller is not allowed to modify the returned buffer pointer
   /// if an internal buffer is returned.
   unsigned getSpelling(const Token &Tok, const char *&Buffer, 
-                       bool *Invalid = 0) const;
+                       bool *Invalid = 0, bool LiteralOnly = false) const;
 
   /// getSpelling - This method is used to get the spelling of a token into a
   /// SmallVector. Note that the returned StringRef may not point to the
diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h
index bd9b46869a..e6f7c7c3f5 100644
--- a/include/clang/Lex/Token.h
+++ b/include/clang/Lex/Token.h
@@ -14,16 +14,16 @@
 #ifndef LLVM_CLANG_TOKEN_H
 #define LLVM_CLANG_TOKEN_H
 
+#include "llvm/Support/Allocator.h"
 #include "clang/Basic/TemplateKinds.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/IdentifierTable.h"
 #include <cstdlib>
 
 namespace clang {
 
-class IdentifierInfo;
-
 /// Token - This structure provides full information about a lexed token.
 /// It is not intended to be space efficient, it is intended to return as much
 /// information as possible about each returned token.  This is expected to be
@@ -34,6 +34,14 @@ class IdentifierInfo;
 /// can be represented by a single typename annotation token that carries
 /// information about the SourceRange of the tokens and the type object.
 class Token {
+  /// An extra-large structure for storing the data needed for a user-defined
+  /// literal - the raw literal, and the identifier suffix.
+  struct UDLData {
+    IdentifierInfo *II;
+    const char *LiteralData;
+    unsigned LiteralLength;
+  };
+
   /// The location of the token.
   SourceLocation Loc;
 
@@ -47,7 +55,7 @@ class Token {
   /// token.
   unsigned UintData;
 
-  /// PtrData - This is a union of four different pointer types, which depends
+  /// PtrData - This is a union of five different pointer types, which depends
   /// on what type of token this is:
   ///  Identifiers, keywords, etc:
   ///    This is an IdentifierInfo*, which contains the uniqued identifier
@@ -55,6 +63,8 @@ class Token {
   ///  Literals:  isLiteral() returns true.
   ///    This is a pointer to the start of the token in a text buffer, which
   ///    may be dirty (have trigraphs / escaped newlines).
+  ///  User-defined literals: isUserDefinedLiteral() returns true.
+  ///    This is a pointer to a UDLData.
   ///  Annotations (resolved type names, C++ scopes, etc): isAnnotation().
   ///    This is a pointer to sema-specific data for the annotation token.
   ///  Other:
@@ -71,12 +81,14 @@ class Token {
   unsigned char Flags;
 public:
 
-  // Various flags set per token:
+  /// Various flags set per token:
   enum TokenFlags {
-    StartOfLine   = 0x01,  // At start of line or only after whitespace.
-    LeadingSpace  = 0x02,  // Whitespace exists before this token.
-    DisableExpand = 0x04,  // This identifier may never be macro expanded.
-    NeedsCleaning = 0x08   // Contained an escaped newline or trigraph.
+    StartOfLine   =       0x01,  ///< At start of line or only after whitespace
+    LeadingSpace  =       0x02,  ///< Whitespace exists before this token
+    DisableExpand =       0x04,  ///< This identifier may never be macro expanded
+    NeedsCleaning =       0x08,  ///< Contained an escaped newline or trigraph
+    UserDefinedLiteral =  0x10,  ///< This literal has a ud-suffix
+    LiteralPortionClean = 0x20   ///< A UDL's literal portion needs no cleaning
   };
 
   tok::TokenKind getKind() const { return (tok::TokenKind)Kind; }
@@ -108,12 +120,34 @@ public:
     assert(!isAnnotation() && "Annotation tokens have no length field");
     return UintData;
   }
+  /// getLiteralLength - Return the length of the literal portion of the token,
+  /// which may not be the token length if this is a user-defined literal.
+  unsigned getLiteralLength() const {
+    assert(isLiteral() && "Using getLiteralLength on a non-literal token");
+    if (isUserDefinedLiteral())
+      return reinterpret_cast<UDLData*>(PtrData)->LiteralLength;
+    else
+      return UintData;
+  }
 
   void setLocation(SourceLocation L) { Loc = L; }
   void setLength(unsigned Len) {
     assert(!isAnnotation() && "Annotation tokens have no length field");
     UintData = Len;
   }
+  void setLiteralLength(unsigned Len) {
+    assert(isLiteral() && "Using setLiteralLength on a non-literal token");
+    if (isUserDefinedLiteral())
+      reinterpret_cast<UDLData*>(PtrData)->LiteralLength = Len;
+    else
+      UintData = Len;
+  }
+
+  /// makeUserDefinedLiteral - Set this token to be a user-defined literal
+  void makeUserDefinedLiteral(llvm::BumpPtrAllocator &Alloc) {
+    PtrData = Alloc.Allocate(sizeof(UDLData), 4);
+    setFlag(UserDefinedLiteral);
+  }
 
   SourceLocation getAnnotationEndLoc() const {
     assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
@@ -154,11 +188,18 @@ public:
 
   IdentifierInfo *getIdentifierInfo() const {
     assert(!isAnnotation() && "Used IdentInfo on annotation token!");
-    if (isLiteral()) return 0;
-    return (IdentifierInfo*) PtrData;
+    if (isUserDefinedLiteral())
+      return reinterpret_cast<UDLData*>(PtrData)->II;
+    else if (isLiteral())
+      return 0;
+    else
+      return reinterpret_cast<IdentifierInfo*>(PtrData);
   }
   void setIdentifierInfo(IdentifierInfo *II) {
-    PtrData = (void*) II;
+    if (isUserDefinedLiteral())
+      reinterpret_cast<UDLData*>(PtrData)->II = II;
+    else
+      PtrData = (void*)II;
   }
 
   /// getLiteralData - For a literal token (numeric constant, string, etc), this
@@ -166,11 +207,17 @@ public:
   /// otherwise.
   const char *getLiteralData() const {
     assert(isLiteral() && "Cannot get literal data of non-literal");
-    return reinterpret_cast<const char*>(PtrData);
+    if (isUserDefinedLiteral())
+      return reinterpret_cast<UDLData*>(PtrData)->LiteralData;
+    else
+      return reinterpret_cast<const char*>(PtrData);
   }
   void setLiteralData(const char *Ptr) {
     assert(isLiteral() && "Cannot set literal data of non-literal");
-    PtrData = const_cast<char*>(Ptr);
+    if (isUserDefinedLiteral())
+      reinterpret_cast<UDLData*>(PtrData)->LiteralData = Ptr;
+    else
+      PtrData = const_cast<char*>(Ptr);
   }
 
   void *getAnnotationValue() const {
@@ -221,6 +268,12 @@ public:
     return (Flags & DisableExpand) ? true : false;
   }
 
+  /// isUserDefinedLiteral - Return true if this is a C++0x user-defined literal
+  /// token.
+  bool isUserDefinedLiteral() const {
+    return (Flags & UserDefinedLiteral) ? true : false;
+  }
+
   /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
   bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
 
@@ -229,8 +282,17 @@ public:
 
   /// needsCleaning - Return true if this token has trigraphs or escaped
   /// newlines in it.
-  ///
-  bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; }
+  bool needsCleaning() const {
+    return (Flags & NeedsCleaning) ? true : false;
+  }
+
+  /// literalNeedsCleaning - Return true if the literal portion of this token
+  /// needs cleaning.
+  bool literalNeedsCleaning() const {
+    assert(isLiteral() && "Using literalNeedsCleaning on a non-literal token");
+    return (Flags & NeedsCleaning) ? ((Flags & LiteralPortionClean) ? false : true)
+                                   : false;
+  }
 };
 
 /// PPConditionalInfo - Information about the conditional stack (#if directives)
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 5558eae4da..d8c8b02499 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -1752,8 +1752,8 @@ public:
 
   /// ActOnStringLiteral - The specified tokens were lexed as pasted string
   /// fragments (e.g. "foo" "bar" L"baz").
-  virtual ExprResult ActOnStringLiteral(const Token *Toks,
-                                              unsigned NumToks);
+  virtual ExprResult ActOnStringLiteral(Scope *S, const Token *Toks,
+                                        unsigned NumToks);
 
   // Binary/Unary Operators.  'Tok' is the token for the operator.
   ExprResult CreateBuiltinUnaryOp(SourceLocation OpLoc,
@@ -2747,6 +2747,9 @@ public:
 
   bool CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl);
 
+  ExprResult BuildUDStringLiteralExpr(Scope *S, StringLiteral *SL, unsigned L,
+                                      IdentifierInfo *II);
+
   //===--------------------------------------------------------------------===//
   // C++ Templates [C++ 14]
   //
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index 14cbbaf0e8..d067668f4b 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -2423,7 +2423,8 @@ static ICEDiag CheckICE(const Expr* E, ASTContext &Ctx) {
   case Expr::UnaryTypeTraitExprClass:
     return NoDiag();
   case Expr::CallExprClass:
-  case Expr::CXXOperatorCallExprClass: {
+  case Expr::CXXOperatorCallExprClass:
+  case Expr::UDLiteralExprClass: {
     const CallExpr *CE = cast<CallExpr>(E);
     if (CE->isBuiltinCall(Ctx))
       return CheckEvalInICE(E, Ctx);
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index ea528c2dae..084bdb3ed6 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -1199,6 +1199,11 @@ void StmtPrinter::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *Node) {
   }
 }
 
+void StmtPrinter::VisitUDLiteralExpr(UDLiteralExpr *Node) {
+  VisitStmt(Node->getBaseLiteral());
+  OS << Node->getUDSuffix()->getName();
+}
+
 static const char *getTypeTraitName(UnaryTypeTrait UTT) {
   switch (UTT) {
   default: assert(false && "Unknown type trait");
diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp
index 098aec0a19..79454748dd 100644
--- a/lib/AST/StmtProfile.cpp
+++ b/lib/AST/StmtProfile.cpp
@@ -828,6 +828,12 @@ void StmtProfiler::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *S) {
     VisitTemplateArguments(S->getTemplateArgs(), S->getNumTemplateArgs());
 }
 
+void StmtProfiler::VisitUDLiteralExpr(UDLiteralExpr *S) {
+  VisitExpr(S);
+  VisitStmt(S->getBaseLiteral());
+  ID.AddString(S->getUDSuffix()->getName());
+}
+
 void StmtProfiler::VisitObjCStringLiteral(ObjCStringLiteral *S) {
   VisitExpr(S);
 }
diff --git a/lib/Checker/GRExprEngine.cpp b/lib/Checker/GRExprEngine.cpp
index c9173aa92a..059749d16f 100644
--- a/lib/Checker/GRExprEngine.cpp
+++ b/lib/Checker/GRExprEngine.cpp
@@ -886,6 +886,7 @@ void GRExprEngine::Visit(const Stmt* S, ExplodedNode* Pred,
     }
 
     case Stmt::CallExprClass:
+    case Stmt::UDLiteralExprClass:
     case Stmt::CXXOperatorCallExprClass: {
       const CallExpr* C = cast<CallExpr>(S);
       VisitCall(C, Pred, C->arg_begin(), C->arg_end(), Dst, false);
diff --git a/lib/CodeGen/Mangle.cpp b/lib/CodeGen/Mangle.cpp
index c06b4fc699..51b07a1327 100644
--- a/lib/CodeGen/Mangle.cpp
+++ b/lib/CodeGen/Mangle.cpp
@@ -1634,6 +1634,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
     break;
 
   case Expr::CXXMemberCallExprClass: // fallthrough
+  case Expr::UDLiteralExprClass:
   case Expr::CallExprClass: {
     const CallExpr *CE = cast<CallExpr>(E);
     Out << "cl";
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 6cd1873e28..b4cafb49f8 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -548,6 +548,11 @@ static void InitCharacterInfo() {
   isInited = true;
 }
 
+/// isIdentifierStart - Return true if this is the start character of an
+/// identifier, which is [a-zA-Z_].
+static inline bool isIdentifierStart(unsigned char c) {
+  return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
+}
 
 /// isIdentifierBody - Return true if this is the body character of an
 /// identifier, which is [a-zA-Z0-9_].
@@ -982,8 +987,30 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
 
   // Update the location of the token as well as the BufferPtr instance var.
   const char *TokStart = BufferPtr;
-  FormTokenWithChars(Result, CurPtr,
-                     Wide ? tok::wide_string_literal : tok::string_literal);
+  tok::TokenKind Kind = Wide ? tok::wide_string_literal : tok::string_literal;
+
+  // FIXME: Handle UCNs
+  unsigned Size;
+  if (PP && PP->getLangOptions().CPlusPlus0x &&
+      isIdentifierStart(getCharAndSize(CurPtr, Size))) {
+    Result.makeUserDefinedLiteral(ExtraDataAllocator);
+    Result.setFlagValue(Token::LiteralPortionClean, !Result.needsCleaning());
+    Result.setKind(Kind);
+    Result.setLiteralLength(CurPtr - BufferPtr);
+
+    // FIXME: We hack around the lexer's routines a lot here.
+    BufferPtr = CurPtr;
+    bool OldRawMode = LexingRawMode;
+    LexingRawMode = true;
+    LexIdentifier(Result, ConsumeChar(CurPtr, Size, Result));
+    LexingRawMode = OldRawMode;
+    PP->LookUpIdentifierInfo(Result, CurPtr);
+
+    CurPtr = BufferPtr;
+    BufferPtr = TokStart;
+  }
+
+  FormTokenWithChars(Result, CurPtr, Kind);
   Result.setLiteralData(TokStart);
 }
 
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index a12c4ae0d4..eb7337a565 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -758,30 +758,38 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
 ///
 StringLiteralParser::
 StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
-                    Preprocessor &pp, bool Complain) : PP(pp) {
+                    Preprocessor &pp, bool Complain) : PP(pp), hadError(false) {
   // Scan all of the string portions, remember the max individual token length,
   // computing a bound on the concatenated string length, and see whether any
   // piece is a wide-string.  If any of the string portions is a wide-string
   // literal, the result is a wide-string literal [C99 6.4.5p4].
-  MaxTokenLength = StringToks[0].getLength();
-  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
+  MaxTokenLength = StringToks[0].getLiteralLength();
+  SizeBound = StringToks[0].getLiteralLength()-2;  // -2 for "".
   AnyWide = StringToks[0].is(tok::wide_string_literal);
-
-  hadError = false;
+  UDSuffix = StringToks[0].getIdentifierInfo();
 
   // Implement Translation Phase #6: concatenation of string literals
   /// (C99 5.1.1.2p1).  The common case is only one string fragment.
   for (unsigned i = 1; i != NumStringToks; ++i) {
     // The string could be shorter than this if it needs cleaning, but this is a
     // reasonable bound, which is all we need.
-    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
+    SizeBound += StringToks[i].getLiteralLength()-2;  // -2 for "".
 
     // Remember maximum string piece length.
-    if (StringToks[i].getLength() > MaxTokenLength)
-      MaxTokenLength = StringToks[i].getLength();
+    if (StringToks[i].getLiteralLength() > MaxTokenLength)
+      MaxTokenLength = StringToks[i].getLiteralLength();
 
     // Remember if we see any wide strings.
     AnyWide |= StringToks[i].is(tok::wide_string_literal);
+
+    if (StringToks[i].isUserDefinedLiteral()) {
+      if (UDSuffix && UDSuffix != StringToks[i].getIdentifierInfo()) {
+        // FIXME: Improve location and note previous
+        PP.Diag(StringToks[0].getLocation(), diag::err_ud_suffix_mismatch);
+        hadError = true;
+      } else if (!UDSuffix)
+        UDSuffix = StringToks[0].getIdentifierInfo();
+    }
   }
 
   // Include space for the null terminator.
@@ -823,7 +831,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
     // and 'spelled' tokens can only shrink.
     bool StringInvalid = false;
     unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, 
-                                         &StringInvalid);
+                                         &StringInvalid, true);
     if (StringInvalid) {
       hadError = 1;
       continue;
@@ -938,7 +946,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
                                                     bool Complain) {
   // Get the spelling of the token.
   llvm::SmallString<16> SpellingBuffer;
-  SpellingBuffer.resize(Tok.getLength());
+  SpellingBuffer.resize(Tok.getLiteralLength());
 
   bool StringInvalid = false;
   const char *SpellingPtr = &SpellingBuffer[0];
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 5160acf19e..f52d35494a 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -352,15 +352,25 @@ std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const {
 /// to point to a constant buffer with the data already in it (avoiding a
 /// copy).  The caller is not allowed to modify the returned buffer pointer
 /// if an internal buffer is returned.
-unsigned Preprocessor::getSpelling(const Token &Tok,
-                                   const char *&Buffer, bool *Invalid) const {
+///
+/// If LiteralOnly is specified, only the literal portion of the token is
+/// processed.
+unsigned Preprocessor::getSpelling(const Token &Tok, const char *&Buffer,
+                                   bool *Invalid, bool LiteralOnly) const {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  assert((!LiteralOnly || Tok.isLiteral()) &&
+         "LiteralOnly used on a non-literal token");
+
+  unsigned (Token::*getLength) () const =
+    LiteralOnly ? &Token::getLiteralLength : &Token::getLength;
 
   // If this token is an identifier, just return the string from the identifier
   // table, which is very quick.
   if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
-    Buffer = II->getNameStart();
-    return II->getLength();
+    if (!Tok.isUserDefinedLiteral()) {
+      Buffer = II->getNameStart();
+      return II->getLength();
+    }
   }
 
   // Otherwise, compute the start of the token in the input lexer buffer.
@@ -381,20 +391,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
   }
 
   // If this token contains nothing interesting, return it directly.
-  if (!Tok.needsCleaning()) {
+  if (!(LiteralOnly ? Tok.literalNeedsCleaning() : Tok.needsCleaning())) {
     Buffer = TokStart;
-    return Tok.getLength();
+    return (Tok.*getLength)();
   }
 
   // Otherwise, hard case, relex the characters into the string.
   char *OutBuf = const_cast<char*>(Buffer);
-  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+  for (const char *Ptr = TokStart, *End = TokStart+(Tok.*getLength)();
        Ptr != End; ) {
     unsigned CharSize;
     *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
     Ptr += CharSize;
   }
-  assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+  assert(unsigned(OutBuf-Buffer) != (Tok.*getLength)() &&
          "NeedsCleaning flag set on something that didn't need cleaning!");
 
   return OutBuf-Buffer;
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index 290b72c4c0..2ade77a907 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -1568,7 +1568,8 @@ ExprResult Parser::ParseStringLiteralExpression() {
   } while (isTokenStringLiteral());
 
   // Pass the set of string tokens, ready for concatenation, to the actions.
-  return Actions.ActOnStringLiteral(&StringToks[0], StringToks.size());
+  return Actions.ActOnStringLiteral(getCurScope(), &StringToks[0],
+                                    StringToks.size());
 }
 
 /// ParseExpressionList - Used for C/C++ (argument-)expression-list.
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index af927285a4..94b8c3baac 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -1244,11 +1244,12 @@ StmtResult Parser::FuzzyParseMicrosoftAsmStatement() {
              Tok.isNot(tok::eof));
   }
   Token t;
+  t.startToken();
   t.setKind(tok::string_literal);
   t.setLiteralData("\"/*FIXME: not done*/\"");
   t.clearFlag(Token::NeedsCleaning);
   t.setLength(21);
-  ExprResult AsmString(Actions.ActOnStringLiteral(&t, 1));
+  ExprResult AsmString(Actions.ActOnStringLiteral(getCurScope(), &t, 1));
   ExprVector Constraints(Actions);
   ExprVector Exprs(Actions);
   ExprVector Clobbers(Actions);
diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp
index 35920c2792..4abeaaf246 100644
--- a/lib/Sema/SemaDeclCXX.cpp
+++ b/lib/Sema/SemaDeclCXX.cpp
@@ -5932,6 +5932,9 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) {
     return true;
   }
 
+  if (FnDecl->getDeclName().getCXXLiteralIdentifier()->getName()[0] != '_')
+    Diag(FnDecl->getLocation(), diag::warn_literal_operator_no_underscore);
+
   bool Valid = false;
 
   // template <char...> type operator "" name() is the only valid template
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index 1a065eb63c..4d0ac90d39 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -375,7 +375,8 @@ QualType Sema::UsualArithmeticConversions(Expr *&lhsExpr, Expr *&rhsExpr,
 /// string.
 ///
 ExprResult
-Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) {
+Sema::ActOnStringLiteral(Scope *S, const Token *StringToks,
+                         unsigned NumStringToks) {
   assert(NumStringToks && "Must have at least one string!");
 
   StringLiteralParser Literal(StringToks, NumStringToks, PP);
@@ -401,12 +402,18 @@ Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) {
                                  llvm::APInt(32, Literal.GetNumStringChars()+1),
                                        ArrayType::Normal, 0);
 
+  StringLiteral *SL = StringLiteral::Create(Context, Literal.GetString(),
+                                           Literal.GetStringLength(),
+                                           Literal.AnyWide, StrTy,
+                                           &StringTokLocs[0],
+                                           StringTokLocs.size());
+
+  if (Literal.isUserDefinedLiteral())
+    return BuildUDStringLiteralExpr(S, SL, Literal.GetNumStringChars(),
+                                    Literal.getUDSuffix());
+
   // Pass &StringTokLocs[0], StringTokLocs.size() to factory!
-  return Owned(StringLiteral::Create(Context, Literal.GetString(),
-                                     Literal.GetStringLength(),
-                                     Literal.AnyWide, StrTy,
-                                     &StringTokLocs[0],
-                                     StringTokLocs.size()));
+  return Owned(SL);
 }
 
 /// ShouldSnapshotBlockValueReference - Return true if a reference inside of
diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp
index 5720d931b6..344196cc6f 100644
--- a/lib/Sema/SemaExprCXX.cpp
+++ b/lib/Sema/SemaExprCXX.cpp
@@ -3057,3 +3057,61 @@ ExprResult Sema::ActOnFinishFullExpr(Expr *FullExpr) {
   if (!FullExpr) return ExprError();
   return MaybeCreateCXXExprWithTemporaries(FullExpr);
 }
+
+ExprResult Sema::BuildUDStringLiteralExpr(Scope *S, StringLiteral *SL,
+                                          unsigned L, IdentifierInfo *II) {
+  DeclarationName DN = Context.DeclarationNames.getCXXLiteralOperatorName(II);
+
+  LookupResult R(*this, DN, SL->getLocStart(),  LookupOrdinaryName);
+  LookupName(R, S);
+
+  llvm::APInt APL(Context.getTypeSize(Context.getSizeType()), L);
+
+  Expr *Args[2];
+  Args[0] = SL;
+  Args[1] = new (Context) IntegerLiteral(Context, APL, Context.getSizeType(),
+                                         SourceLocation());
+
+  OverloadCandidateSet CandidateSet(SL->getLocStart());
+  AddFunctionCandidates(R.asUnresolvedSet(), Args, 2, CandidateSet);
+  OverloadCandidateSet::iterator Best;
+  switch (CandidateSet.BestViableFunction(*this, SL->getLocStart(), Best)) {
+    case OR_Ambiguous:
+      llvm_unreachable("UD literals should not have ambiguous overloads");
+      return ExprError();
+    case OR_No_Viable_Function:
+      Diag(SL->getLocStart(), diag::err_literal_operator_overload)
+        << SL->getSourceRange() << II->getName();
+      return ExprError();
+    case OR_Deleted:
+      Diag(SL->getLocStart(), diag::err_literal_operator_deleted)
+        << SL->getSourceRange() << II->getName();
+      //FIXME: Note the deleted function
+      return ExprError();
+    case OR_Success:
+      break;
+  }
+
+  assert(Best->Function && "Literal operator function not a real function");
+  FunctionDecl *FD = Best->Function;
+
+  ExprResult InputInit
+    = PerformCopyInitialization(InitializedEntity::InitializeParameter(
+                                                   FD->getParamDecl(0)),
+                                SourceLocation(), Owned(SL));
+  if (InputInit.isInvalid())
+    return ExprError();
+  Args[0] = InputInit.takeAs<Expr>();
+
+  QualType ResultTy = FD->getResultType().getNonReferenceType();
+  Expr *Fn = new (Context) DeclRefExpr(FD, FD->getType(), SourceLocation());
+  UsualUnaryConversions(Fn);
+
+  UDLiteralExpr *E = new (Context) UDLiteralExpr(Context, SL, Fn, Args, 2,
+                                                     ResultTy);
+
+  if (CheckCallReturnType(FD->getResultType(), SL->getLocStart(), E, FD))
+    return ExprError();
+
+  return MaybeBindToTemporary(E);
+}
diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h
index 08cfd68d66..8784c844be 100644
--- a/lib/Sema/TreeTransform.h
+++ b/lib/Sema/TreeTransform.h
@@ -6028,6 +6028,12 @@ TreeTransform<Derived>::TransformObjCEncodeExpr(ObjCEncodeExpr *E) {
                                             E->getRParenLoc());
 }
 
+template<typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformUDLiteralExpr(UDLiteralExpr *E) {
+  return SemaRef.Owned(E->Retain());
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformObjCMessageExpr(ObjCMessageExpr *E) {
diff --git a/test/Parser/cxx0x-literal-operators.cpp b/test/Parser/cxx0x-literal-operators.cpp
index 30b290382c..5bebb0a707 100644
--- a/test/Parser/cxx0x-literal-operators.cpp
+++ b/test/Parser/cxx0x-literal-operators.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++0x %s
 
 void operator "" (const char *); // expected-error {{expected identifier}}
-void operator "k" foo(const char *); // expected-error {{string literal after 'operator' must be '""'}}
-void operator "" tester (const char *);
+void operator "k"_foo(const char *); // expected-error {{string literal after 'operator' must be '""'}}
+void operator ""_tester (const char *);
diff --git a/test/SemaCXX/literal-operator-dcls.cpp b/test/SemaCXX/literal-operator-dcls.cpp
new file mode 100644
index 0000000000..88bf2b936f
--- /dev/null
+++ b/test/SemaCXX/literal-operator-dcls.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++0x %s
+
+#include <stddef.h>
+
+struct tag {
+  void operator "" _tag_bad (const char *); // expected-error {{literal operator 'operator "" _tag_bad' must be in a namespace or global scope}}
+  friend void operator "" _tag_good (const char *);
+};
+
+namespace ns { void operator "" _ns_good (const char *); }
+
+// Check extern "C++" declarations
+extern "C++" void operator "" _extern_good (const char *);
+extern "C++" { void operator "" _extern_good (const char *); }
+
+void fn () { void operator "" _fn_bad (const char *); } // expected-error {{literal operator 'operator "" _fn_bad' must be in a namespace or global scope}}
+
+// Warning name
+void operator "" warn (const char *); // expected-warning {{reserved for future standardization}}
+
+// One-param declarations (const char * was already checked)
+void operator "" _good (char);
+void operator "" _good (wchar_t);
+void operator "" _good (char16_t);
+void operator "" _good (char32_t);
+void operator "" _good (unsigned long long);
+void operator "" _good (long double);
+
+// Two-param declarations
+void operator "" _good (const char *, size_t);
+void operator "" _good (const wchar_t *, size_t);
+void operator "" _good (const char16_t *, size_t);
+void operator "" _good (const char32_t *, size_t);
+
+// Check typedef and array equivalences
+void operator "" _good (const char[]);
+typedef const char c;
+void operator "" _good (c*);
+
+// Check extra cv-qualifiers
+void operator "" _cv_good (volatile const char *, const size_t);
+
+// Template delcaration (not implemented yet)
+// template <char...> void operator "" good ();
+
+// FIXME: Test some invalid decls that might crop up.
diff --git a/test/SemaCXX/literal-operators.cpp b/test/SemaCXX/literal-operators.cpp
index ec585a61da..93fd4b6a14 100644
--- a/test/SemaCXX/literal-operators.cpp
+++ b/test/SemaCXX/literal-operators.cpp
@@ -2,42 +2,21 @@
 
 #include <stddef.h>
 
-struct tag {
-  void operator "" tag_bad (const char *); // expected-error {{literal operator 'operator "" tag_bad' must be in a namespace or global scope}}
-  friend void operator "" tag_good (const char *);
+template <typename T, typename U> struct same_type {
+  static const bool value = false;
 };
 
-namespace ns { void operator "" ns_good (const char *); }
-
-// Check extern "C++" declarations
-extern "C++" void operator "" extern_good (const char *);
-extern "C++" { void operator "" extern_good (const char *); }
-
-void fn () { void operator "" fn_bad (const char *); } // expected-error {{literal operator 'operator "" fn_bad' must be in a namespace or global scope}}
-
-// One-param declarations (const char * was already checked)
-void operator "" good (char);
-void operator "" good (wchar_t);
-void operator "" good (char16_t);
-void operator "" good (char32_t);
-void operator "" good (unsigned long long);
-void operator "" good (long double);
+template <typename T> struct same_type<T, T> {
+  static const bool value = true;
+};
 
-// Two-param declarations
-void operator "" good (const char *, size_t);
-void operator "" good (const wchar_t *, size_t);
-void operator "" good (const char16_t *, size_t);
-void operator "" good (const char32_t *, size_t);
+int operator "" _int (const char *, size_t);
+static_assert(same_type<int, decltype(""_int)>::value, "not the same type!");
 
-// Check typedef and array equivalences
-void operator "" good (const char[]);
-typedef const char c;
-void operator "" good (c*);
+int i = ""_int;
+int j = L""_int; // expected-error {{no matching literal operator function}}
 
-// Check extra cv-qualifiers
-void operator "" cv_good (volatile const char *, const size_t);
+int operator "" _int (const wchar_t *, size_t);
 
-// Template delcaration (not implemented yet)
-// template <char...> void operator "" good ();
+int k = L""_int;
 
-// FIXME: Test some invalid decls that might crop up.
diff --git a/tools/libclang/CXCursor.cpp b/tools/libclang/CXCursor.cpp
index 40f91a7383..b531011650 100644
--- a/tools/libclang/CXCursor.cpp
+++ b/tools/libclang/CXCursor.cpp
@@ -218,6 +218,7 @@ CXCursor cxcursor::MakeCXCursor(Stmt *S, Decl *Parent, ASTUnit *TU) {
   case Stmt::CXXMemberCallExprClass:
   case Stmt::CXXConstructExprClass:  
   case Stmt::CXXTemporaryObjectExprClass:
+  case Stmt::UDLiteralExprClass:
     // FIXME: CXXUnresolvedConstructExpr
     // FIXME: ObjCImplicitSetterGetterRefExpr?
     K = CXCursor_CallExpr;