From: Sean Hunt Date: Sun, 29 Aug 2010 21:26:48 +0000 (+0000) Subject: Implement C++0x user-defined string literals. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0016d519b831859526b79405cdae4c64c73731c8;p=clang Implement C++0x user-defined string literals. The extra data stored on user-defined literal Tokens is stored in extra allocated memory, which is managed by the PreprocessorLexer because there isn't a better place to put it that makes sure it gets deallocated, but only after it's used up. My testing has shown no significant slowdown as a result, but independent testing would be appreciated. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@112458 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/AST/ExprCXX.h b/include/clang/AST/ExprCXX.h index 450348b9d2..1529b0ca13 100644 --- a/include/clang/AST/ExprCXX.h +++ b/include/clang/AST/ExprCXX.h @@ -2422,6 +2422,50 @@ public: virtual child_iterator child_end(); }; +/// UDLiteralExpr - An expression for a user-defined +/// string literal (e.g. "foo"_bar) +/// +/// Both the DeclRefExpr and the IntegerConstant are fictional expressions +/// generated from the literal. +class UDLiteralExpr : public CallExpr { + Expr *BaseLiteral; + + static bool isValidLiteral(Expr *E) { + return isa(E) || isa(E) || + isa(E) || isa(E); + } +public: + UDLiteralExpr(ASTContext &C, Expr *E, Expr *fn, Expr **args, + unsigned numargs, QualType t) + : CallExpr(C, UDLiteralExprClass, fn, args, numargs, t, SourceLocation()) + , BaseLiteral(E) { + assert(isValidLiteral(E) && "Base literal must be an actual literal"); + } + + FunctionDecl *getLiteralOperator() { return getDirectCallee(); } + const FunctionDecl *getLiteralOperator() const { return getDirectCallee(); } + + Expr *getBaseLiteral() { return BaseLiteral; } + const Expr *getBaseLiteral() const { return BaseLiteral; } + void setBaseLiteral(Expr *E) { + assert(isValidLiteral(E) && "Base literal must be an actual literal"); + BaseLiteral = E; + } + + IdentifierInfo *getUDSuffix() const { + return getLiteralOperator()->getDeclName().getCXXLiteralIdentifier(); + } + + virtual SourceRange getSourceRange() const { + return getBaseLiteral()->getSourceRange(); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == UDLiteralExprClass; + } + static bool classof(const UDLiteralExpr *) { return true; } +}; + inline ExplicitTemplateArgumentList &OverloadExpr::getExplicitTemplateArgs() { if (isa(this)) return cast(this)->getExplicitTemplateArgs(); diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h index 79ac072fa1..a360cbd427 100644 --- a/include/clang/AST/RecursiveASTVisitor.h +++ b/include/clang/AST/RecursiveASTVisitor.h @@ -1694,6 +1694,7 @@ DEF_TRAVERSE_STMT(CXXTemporaryObjectExpr, { DEF_TRAVERSE_STMT(CallExpr, { }) DEF_TRAVERSE_STMT(CXXMemberCallExpr, { }) DEF_TRAVERSE_STMT(CXXOperatorCallExpr, { }) +DEF_TRAVERSE_STMT(UDLiteralExpr, { }) // These operators (all of them) do not need any action except // iterating over the children. diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td index 07f53d9867..2896a2f89e 100644 --- a/include/clang/Basic/DiagnosticLexKinds.td +++ b/include/clang/Basic/DiagnosticLexKinds.td @@ -99,6 +99,9 @@ def ext_string_too_long : Extension<"string literal of length %0 exceeds " "maximum length %1 that %select{C90|ISO C99|C++}2 compilers are required to " "support">, InGroup; +def err_ud_suffix_mismatch : Error<"User-defined literal suffixes on adjacent " + "string literal tokens do not match">; + //===----------------------------------------------------------------------===// // PTH Diagnostics //===----------------------------------------------------------------------===// diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 32550dc616..e85ce4f483 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -2946,9 +2946,15 @@ def err_operator_delete_param_type : Error< // C++ literal operators def err_literal_operator_outside_namespace : Error< "literal operator %0 must be in a namespace or global scope">; -// FIXME: This diagnostic sucks +def warn_literal_operator_no_underscore : Warning< "literal operator names not " + "beginning with underscores are reserved for future standardization">; +def err_literal_operator_overload : Error< + "no matching literal operator function for user-defined suffix '%0'">; +def err_literal_operator_deleted : Error< + "deleted literal operator function for user-defined suffix '%0'">; +// FIXME: This should really provide information about what is allowed. def err_literal_operator_params : Error< - "parameter declaration for literal operator %0 is not valid">; + "parameter declaration for literal operator '%0' is not valid">; // C++ conversion functions def err_conv_function_not_member : Error< diff --git a/include/clang/Basic/StmtNodes.td b/include/clang/Basic/StmtNodes.td index a2f69730a0..2b446c674a 100644 --- a/include/clang/Basic/StmtNodes.td +++ b/include/clang/Basic/StmtNodes.td @@ -112,6 +112,9 @@ def OverloadExpr : DStmt; def UnresolvedLookupExpr : DStmt; def UnresolvedMemberExpr : DStmt; +// C++0x expressions +def UDLiteralExpr : DStmt; + // Obj-C Expressions. def ObjCStringLiteral : DStmt; def ObjCEncodeExpr : DStmt; diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 9e0fb7ee70..e05113da76 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -17,6 +17,7 @@ #include "clang/Lex/PreprocessorLexer.h" #include "clang/Basic/LangOptions.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" #include #include #include @@ -67,6 +68,9 @@ class Lexer : public PreprocessorLexer { // line" flag set on it. bool IsAtStartOfLine; + // ExtraDataAllocator - An allocator for extra data on a token. + llvm::BumpPtrAllocator ExtraDataAllocator; + Lexer(const Lexer&); // DO NOT IMPLEMENT void operator=(const Lexer&); // DO NOT IMPLEMENT friend class Preprocessor; diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h index ba46fb18a6..40112f5348 100644 --- a/include/clang/Lex/LiteralSupport.h +++ b/include/clang/Lex/LiteralSupport.h @@ -27,6 +27,7 @@ class Preprocessor; class Token; class SourceLocation; class TargetInfo; +class IdentifierInfo; /// NumericLiteralParser - This performs strict semantic analysis of the content /// of a ppnumber, classifying it as either integer, floating, or erroneous, @@ -145,6 +146,7 @@ class StringLiteralParser { unsigned wchar_tByteWidth; llvm::SmallString<512> ResultBuf; char *ResultPtr; // cursor + IdentifierInfo *UDSuffix; public: StringLiteralParser(const Token *StringToks, unsigned NumStringToks, Preprocessor &PP, bool Complain = true); @@ -155,6 +157,9 @@ public: const char *GetString() { return &ResultBuf[0]; } unsigned GetStringLength() const { return ResultPtr-&ResultBuf[0]; } + bool isUserDefinedLiteral() const { return UDSuffix; } + IdentifierInfo *getUDSuffix() const { return UDSuffix; } + unsigned GetNumStringChars() const { if (AnyWide) return GetStringLength() / wchar_tByteWidth; diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 6b9b89ea5e..94e858dfb0 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -645,7 +645,7 @@ public: /// copy). The caller is not allowed to modify the returned buffer pointer /// if an internal buffer is returned. unsigned getSpelling(const Token &Tok, const char *&Buffer, - bool *Invalid = 0) const; + bool *Invalid = 0, bool LiteralOnly = false) const; /// getSpelling - This method is used to get the spelling of a token into a /// SmallVector. Note that the returned StringRef may not point to the diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h index bd9b46869a..e6f7c7c3f5 100644 --- a/include/clang/Lex/Token.h +++ b/include/clang/Lex/Token.h @@ -14,16 +14,16 @@ #ifndef LLVM_CLANG_TOKEN_H #define LLVM_CLANG_TOKEN_H +#include "llvm/Support/Allocator.h" #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/OperatorKinds.h" +#include "clang/Basic/IdentifierTable.h" #include namespace clang { -class IdentifierInfo; - /// Token - This structure provides full information about a lexed token. /// It is not intended to be space efficient, it is intended to return as much /// information as possible about each returned token. This is expected to be @@ -34,6 +34,14 @@ class IdentifierInfo; /// can be represented by a single typename annotation token that carries /// information about the SourceRange of the tokens and the type object. class Token { + /// An extra-large structure for storing the data needed for a user-defined + /// literal - the raw literal, and the identifier suffix. + struct UDLData { + IdentifierInfo *II; + const char *LiteralData; + unsigned LiteralLength; + }; + /// The location of the token. SourceLocation Loc; @@ -47,7 +55,7 @@ class Token { /// token. unsigned UintData; - /// PtrData - This is a union of four different pointer types, which depends + /// PtrData - This is a union of five different pointer types, which depends /// on what type of token this is: /// Identifiers, keywords, etc: /// This is an IdentifierInfo*, which contains the uniqued identifier @@ -55,6 +63,8 @@ class Token { /// Literals: isLiteral() returns true. /// This is a pointer to the start of the token in a text buffer, which /// may be dirty (have trigraphs / escaped newlines). + /// User-defined literals: isUserDefinedLiteral() returns true. + /// This is a pointer to a UDLData. /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). /// This is a pointer to sema-specific data for the annotation token. /// Other: @@ -71,12 +81,14 @@ class Token { unsigned char Flags; public: - // Various flags set per token: + /// Various flags set per token: enum TokenFlags { - StartOfLine = 0x01, // At start of line or only after whitespace. - LeadingSpace = 0x02, // Whitespace exists before this token. - DisableExpand = 0x04, // This identifier may never be macro expanded. - NeedsCleaning = 0x08 // Contained an escaped newline or trigraph. + StartOfLine = 0x01, ///< At start of line or only after whitespace + LeadingSpace = 0x02, ///< Whitespace exists before this token + DisableExpand = 0x04, ///< This identifier may never be macro expanded + NeedsCleaning = 0x08, ///< Contained an escaped newline or trigraph + UserDefinedLiteral = 0x10, ///< This literal has a ud-suffix + LiteralPortionClean = 0x20 ///< A UDL's literal portion needs no cleaning }; tok::TokenKind getKind() const { return (tok::TokenKind)Kind; } @@ -108,12 +120,34 @@ public: assert(!isAnnotation() && "Annotation tokens have no length field"); return UintData; } + /// getLiteralLength - Return the length of the literal portion of the token, + /// which may not be the token length if this is a user-defined literal. + unsigned getLiteralLength() const { + assert(isLiteral() && "Using getLiteralLength on a non-literal token"); + if (isUserDefinedLiteral()) + return reinterpret_cast(PtrData)->LiteralLength; + else + return UintData; + } void setLocation(SourceLocation L) { Loc = L; } void setLength(unsigned Len) { assert(!isAnnotation() && "Annotation tokens have no length field"); UintData = Len; } + void setLiteralLength(unsigned Len) { + assert(isLiteral() && "Using setLiteralLength on a non-literal token"); + if (isUserDefinedLiteral()) + reinterpret_cast(PtrData)->LiteralLength = Len; + else + UintData = Len; + } + + /// makeUserDefinedLiteral - Set this token to be a user-defined literal + void makeUserDefinedLiteral(llvm::BumpPtrAllocator &Alloc) { + PtrData = Alloc.Allocate(sizeof(UDLData), 4); + setFlag(UserDefinedLiteral); + } SourceLocation getAnnotationEndLoc() const { assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); @@ -154,11 +188,18 @@ public: IdentifierInfo *getIdentifierInfo() const { assert(!isAnnotation() && "Used IdentInfo on annotation token!"); - if (isLiteral()) return 0; - return (IdentifierInfo*) PtrData; + if (isUserDefinedLiteral()) + return reinterpret_cast(PtrData)->II; + else if (isLiteral()) + return 0; + else + return reinterpret_cast(PtrData); } void setIdentifierInfo(IdentifierInfo *II) { - PtrData = (void*) II; + if (isUserDefinedLiteral()) + reinterpret_cast(PtrData)->II = II; + else + PtrData = (void*)II; } /// getLiteralData - For a literal token (numeric constant, string, etc), this @@ -166,11 +207,17 @@ public: /// otherwise. const char *getLiteralData() const { assert(isLiteral() && "Cannot get literal data of non-literal"); - return reinterpret_cast(PtrData); + if (isUserDefinedLiteral()) + return reinterpret_cast(PtrData)->LiteralData; + else + return reinterpret_cast(PtrData); } void setLiteralData(const char *Ptr) { assert(isLiteral() && "Cannot set literal data of non-literal"); - PtrData = const_cast(Ptr); + if (isUserDefinedLiteral()) + reinterpret_cast(PtrData)->LiteralData = Ptr; + else + PtrData = const_cast(Ptr); } void *getAnnotationValue() const { @@ -221,6 +268,12 @@ public: return (Flags & DisableExpand) ? true : false; } + /// isUserDefinedLiteral - Return true if this is a C++0x user-defined literal + /// token. + bool isUserDefinedLiteral() const { + return (Flags & UserDefinedLiteral) ? true : false; + } + /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; @@ -229,8 +282,17 @@ public: /// needsCleaning - Return true if this token has trigraphs or escaped /// newlines in it. - /// - bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; } + bool needsCleaning() const { + return (Flags & NeedsCleaning) ? true : false; + } + + /// literalNeedsCleaning - Return true if the literal portion of this token + /// needs cleaning. + bool literalNeedsCleaning() const { + assert(isLiteral() && "Using literalNeedsCleaning on a non-literal token"); + return (Flags & NeedsCleaning) ? ((Flags & LiteralPortionClean) ? false : true) + : false; + } }; /// PPConditionalInfo - Information about the conditional stack (#if directives) diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index 5558eae4da..d8c8b02499 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -1752,8 +1752,8 @@ public: /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). - virtual ExprResult ActOnStringLiteral(const Token *Toks, - unsigned NumToks); + virtual ExprResult ActOnStringLiteral(Scope *S, const Token *Toks, + unsigned NumToks); // Binary/Unary Operators. 'Tok' is the token for the operator. ExprResult CreateBuiltinUnaryOp(SourceLocation OpLoc, @@ -2747,6 +2747,9 @@ public: bool CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl); + ExprResult BuildUDStringLiteralExpr(Scope *S, StringLiteral *SL, unsigned L, + IdentifierInfo *II); + //===--------------------------------------------------------------------===// // C++ Templates [C++ 14] // diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp index 14cbbaf0e8..d067668f4b 100644 --- a/lib/AST/ExprConstant.cpp +++ b/lib/AST/ExprConstant.cpp @@ -2423,7 +2423,8 @@ static ICEDiag CheckICE(const Expr* E, ASTContext &Ctx) { case Expr::UnaryTypeTraitExprClass: return NoDiag(); case Expr::CallExprClass: - case Expr::CXXOperatorCallExprClass: { + case Expr::CXXOperatorCallExprClass: + case Expr::UDLiteralExprClass: { const CallExpr *CE = cast(E); if (CE->isBuiltinCall(Ctx)) return CheckEvalInICE(E, Ctx); diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp index ea528c2dae..084bdb3ed6 100644 --- a/lib/AST/StmtPrinter.cpp +++ b/lib/AST/StmtPrinter.cpp @@ -1199,6 +1199,11 @@ void StmtPrinter::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *Node) { } } +void StmtPrinter::VisitUDLiteralExpr(UDLiteralExpr *Node) { + VisitStmt(Node->getBaseLiteral()); + OS << Node->getUDSuffix()->getName(); +} + static const char *getTypeTraitName(UnaryTypeTrait UTT) { switch (UTT) { default: assert(false && "Unknown type trait"); diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp index 098aec0a19..79454748dd 100644 --- a/lib/AST/StmtProfile.cpp +++ b/lib/AST/StmtProfile.cpp @@ -828,6 +828,12 @@ void StmtProfiler::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *S) { VisitTemplateArguments(S->getTemplateArgs(), S->getNumTemplateArgs()); } +void StmtProfiler::VisitUDLiteralExpr(UDLiteralExpr *S) { + VisitExpr(S); + VisitStmt(S->getBaseLiteral()); + ID.AddString(S->getUDSuffix()->getName()); +} + void StmtProfiler::VisitObjCStringLiteral(ObjCStringLiteral *S) { VisitExpr(S); } diff --git a/lib/Checker/GRExprEngine.cpp b/lib/Checker/GRExprEngine.cpp index c9173aa92a..059749d16f 100644 --- a/lib/Checker/GRExprEngine.cpp +++ b/lib/Checker/GRExprEngine.cpp @@ -886,6 +886,7 @@ void GRExprEngine::Visit(const Stmt* S, ExplodedNode* Pred, } case Stmt::CallExprClass: + case Stmt::UDLiteralExprClass: case Stmt::CXXOperatorCallExprClass: { const CallExpr* C = cast(S); VisitCall(C, Pred, C->arg_begin(), C->arg_end(), Dst, false); diff --git a/lib/CodeGen/Mangle.cpp b/lib/CodeGen/Mangle.cpp index c06b4fc699..51b07a1327 100644 --- a/lib/CodeGen/Mangle.cpp +++ b/lib/CodeGen/Mangle.cpp @@ -1634,6 +1634,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; case Expr::CXXMemberCallExprClass: // fallthrough + case Expr::UDLiteralExprClass: case Expr::CallExprClass: { const CallExpr *CE = cast(E); Out << "cl"; diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 6cd1873e28..b4cafb49f8 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -548,6 +548,11 @@ static void InitCharacterInfo() { isInited = true; } +/// isIdentifierStart - Return true if this is the start character of an +/// identifier, which is [a-zA-Z_]. +static inline bool isIdentifierStart(unsigned char c) { + return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false; +} /// isIdentifierBody - Return true if this is the body character of an /// identifier, which is [a-zA-Z0-9_]. @@ -982,8 +987,30 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { // Update the location of the token as well as the BufferPtr instance var. const char *TokStart = BufferPtr; - FormTokenWithChars(Result, CurPtr, - Wide ? tok::wide_string_literal : tok::string_literal); + tok::TokenKind Kind = Wide ? tok::wide_string_literal : tok::string_literal; + + // FIXME: Handle UCNs + unsigned Size; + if (PP && PP->getLangOptions().CPlusPlus0x && + isIdentifierStart(getCharAndSize(CurPtr, Size))) { + Result.makeUserDefinedLiteral(ExtraDataAllocator); + Result.setFlagValue(Token::LiteralPortionClean, !Result.needsCleaning()); + Result.setKind(Kind); + Result.setLiteralLength(CurPtr - BufferPtr); + + // FIXME: We hack around the lexer's routines a lot here. + BufferPtr = CurPtr; + bool OldRawMode = LexingRawMode; + LexingRawMode = true; + LexIdentifier(Result, ConsumeChar(CurPtr, Size, Result)); + LexingRawMode = OldRawMode; + PP->LookUpIdentifierInfo(Result, CurPtr); + + CurPtr = BufferPtr; + BufferPtr = TokStart; + } + + FormTokenWithChars(Result, CurPtr, Kind); Result.setLiteralData(TokStart); } diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index a12c4ae0d4..eb7337a565 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -758,30 +758,38 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, /// StringLiteralParser:: StringLiteralParser(const Token *StringToks, unsigned NumStringToks, - Preprocessor &pp, bool Complain) : PP(pp) { + Preprocessor &pp, bool Complain) : PP(pp), hadError(false) { // Scan all of the string portions, remember the max individual token length, // computing a bound on the concatenated string length, and see whether any // piece is a wide-string. If any of the string portions is a wide-string // literal, the result is a wide-string literal [C99 6.4.5p4]. - MaxTokenLength = StringToks[0].getLength(); - SizeBound = StringToks[0].getLength()-2; // -2 for "". + MaxTokenLength = StringToks[0].getLiteralLength(); + SizeBound = StringToks[0].getLiteralLength()-2; // -2 for "". AnyWide = StringToks[0].is(tok::wide_string_literal); - - hadError = false; + UDSuffix = StringToks[0].getIdentifierInfo(); // Implement Translation Phase #6: concatenation of string literals /// (C99 5.1.1.2p1). The common case is only one string fragment. for (unsigned i = 1; i != NumStringToks; ++i) { // The string could be shorter than this if it needs cleaning, but this is a // reasonable bound, which is all we need. - SizeBound += StringToks[i].getLength()-2; // -2 for "". + SizeBound += StringToks[i].getLiteralLength()-2; // -2 for "". // Remember maximum string piece length. - if (StringToks[i].getLength() > MaxTokenLength) - MaxTokenLength = StringToks[i].getLength(); + if (StringToks[i].getLiteralLength() > MaxTokenLength) + MaxTokenLength = StringToks[i].getLiteralLength(); // Remember if we see any wide strings. AnyWide |= StringToks[i].is(tok::wide_string_literal); + + if (StringToks[i].isUserDefinedLiteral()) { + if (UDSuffix && UDSuffix != StringToks[i].getIdentifierInfo()) { + // FIXME: Improve location and note previous + PP.Diag(StringToks[0].getLocation(), diag::err_ud_suffix_mismatch); + hadError = true; + } else if (!UDSuffix) + UDSuffix = StringToks[0].getIdentifierInfo(); + } } // Include space for the null terminator. @@ -823,7 +831,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, // and 'spelled' tokens can only shrink. bool StringInvalid = false; unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, - &StringInvalid); + &StringInvalid, true); if (StringInvalid) { hadError = 1; continue; @@ -938,7 +946,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, bool Complain) { // Get the spelling of the token. llvm::SmallString<16> SpellingBuffer; - SpellingBuffer.resize(Tok.getLength()); + SpellingBuffer.resize(Tok.getLiteralLength()); bool StringInvalid = false; const char *SpellingPtr = &SpellingBuffer[0]; diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 5160acf19e..f52d35494a 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -352,15 +352,25 @@ std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const { /// to point to a constant buffer with the data already in it (avoiding a /// copy). The caller is not allowed to modify the returned buffer pointer /// if an internal buffer is returned. -unsigned Preprocessor::getSpelling(const Token &Tok, - const char *&Buffer, bool *Invalid) const { +/// +/// If LiteralOnly is specified, only the literal portion of the token is +/// processed. +unsigned Preprocessor::getSpelling(const Token &Tok, const char *&Buffer, + bool *Invalid, bool LiteralOnly) const { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + assert((!LiteralOnly || Tok.isLiteral()) && + "LiteralOnly used on a non-literal token"); + + unsigned (Token::*getLength) () const = + LiteralOnly ? &Token::getLiteralLength : &Token::getLength; // If this token is an identifier, just return the string from the identifier // table, which is very quick. if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { - Buffer = II->getNameStart(); - return II->getLength(); + if (!Tok.isUserDefinedLiteral()) { + Buffer = II->getNameStart(); + return II->getLength(); + } } // Otherwise, compute the start of the token in the input lexer buffer. @@ -381,20 +391,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok, } // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) { + if (!(LiteralOnly ? Tok.literalNeedsCleaning() : Tok.needsCleaning())) { Buffer = TokStart; - return Tok.getLength(); + return (Tok.*getLength)(); } // Otherwise, hard case, relex the characters into the string. char *OutBuf = const_cast(Buffer); - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + for (const char *Ptr = TokStart, *End = TokStart+(Tok.*getLength)(); Ptr != End; ) { unsigned CharSize; *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); Ptr += CharSize; } - assert(unsigned(OutBuf-Buffer) != Tok.getLength() && + assert(unsigned(OutBuf-Buffer) != (Tok.*getLength)() && "NeedsCleaning flag set on something that didn't need cleaning!"); return OutBuf-Buffer; diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index 290b72c4c0..2ade77a907 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -1568,7 +1568,8 @@ ExprResult Parser::ParseStringLiteralExpression() { } while (isTokenStringLiteral()); // Pass the set of string tokens, ready for concatenation, to the actions. - return Actions.ActOnStringLiteral(&StringToks[0], StringToks.size()); + return Actions.ActOnStringLiteral(getCurScope(), &StringToks[0], + StringToks.size()); } /// ParseExpressionList - Used for C/C++ (argument-)expression-list. diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp index af927285a4..94b8c3baac 100644 --- a/lib/Parse/ParseStmt.cpp +++ b/lib/Parse/ParseStmt.cpp @@ -1244,11 +1244,12 @@ StmtResult Parser::FuzzyParseMicrosoftAsmStatement() { Tok.isNot(tok::eof)); } Token t; + t.startToken(); t.setKind(tok::string_literal); t.setLiteralData("\"/*FIXME: not done*/\""); t.clearFlag(Token::NeedsCleaning); t.setLength(21); - ExprResult AsmString(Actions.ActOnStringLiteral(&t, 1)); + ExprResult AsmString(Actions.ActOnStringLiteral(getCurScope(), &t, 1)); ExprVector Constraints(Actions); ExprVector Exprs(Actions); ExprVector Clobbers(Actions); diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp index 35920c2792..4abeaaf246 100644 --- a/lib/Sema/SemaDeclCXX.cpp +++ b/lib/Sema/SemaDeclCXX.cpp @@ -5932,6 +5932,9 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) { return true; } + if (FnDecl->getDeclName().getCXXLiteralIdentifier()->getName()[0] != '_') + Diag(FnDecl->getLocation(), diag::warn_literal_operator_no_underscore); + bool Valid = false; // template type operator "" name() is the only valid template diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index 1a065eb63c..4d0ac90d39 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -375,7 +375,8 @@ QualType Sema::UsualArithmeticConversions(Expr *&lhsExpr, Expr *&rhsExpr, /// string. /// ExprResult -Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) { +Sema::ActOnStringLiteral(Scope *S, const Token *StringToks, + unsigned NumStringToks) { assert(NumStringToks && "Must have at least one string!"); StringLiteralParser Literal(StringToks, NumStringToks, PP); @@ -401,12 +402,18 @@ Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) { llvm::APInt(32, Literal.GetNumStringChars()+1), ArrayType::Normal, 0); + StringLiteral *SL = StringLiteral::Create(Context, Literal.GetString(), + Literal.GetStringLength(), + Literal.AnyWide, StrTy, + &StringTokLocs[0], + StringTokLocs.size()); + + if (Literal.isUserDefinedLiteral()) + return BuildUDStringLiteralExpr(S, SL, Literal.GetNumStringChars(), + Literal.getUDSuffix()); + // Pass &StringTokLocs[0], StringTokLocs.size() to factory! - return Owned(StringLiteral::Create(Context, Literal.GetString(), - Literal.GetStringLength(), - Literal.AnyWide, StrTy, - &StringTokLocs[0], - StringTokLocs.size())); + return Owned(SL); } /// ShouldSnapshotBlockValueReference - Return true if a reference inside of diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp index 5720d931b6..344196cc6f 100644 --- a/lib/Sema/SemaExprCXX.cpp +++ b/lib/Sema/SemaExprCXX.cpp @@ -3057,3 +3057,61 @@ ExprResult Sema::ActOnFinishFullExpr(Expr *FullExpr) { if (!FullExpr) return ExprError(); return MaybeCreateCXXExprWithTemporaries(FullExpr); } + +ExprResult Sema::BuildUDStringLiteralExpr(Scope *S, StringLiteral *SL, + unsigned L, IdentifierInfo *II) { + DeclarationName DN = Context.DeclarationNames.getCXXLiteralOperatorName(II); + + LookupResult R(*this, DN, SL->getLocStart(), LookupOrdinaryName); + LookupName(R, S); + + llvm::APInt APL(Context.getTypeSize(Context.getSizeType()), L); + + Expr *Args[2]; + Args[0] = SL; + Args[1] = new (Context) IntegerLiteral(Context, APL, Context.getSizeType(), + SourceLocation()); + + OverloadCandidateSet CandidateSet(SL->getLocStart()); + AddFunctionCandidates(R.asUnresolvedSet(), Args, 2, CandidateSet); + OverloadCandidateSet::iterator Best; + switch (CandidateSet.BestViableFunction(*this, SL->getLocStart(), Best)) { + case OR_Ambiguous: + llvm_unreachable("UD literals should not have ambiguous overloads"); + return ExprError(); + case OR_No_Viable_Function: + Diag(SL->getLocStart(), diag::err_literal_operator_overload) + << SL->getSourceRange() << II->getName(); + return ExprError(); + case OR_Deleted: + Diag(SL->getLocStart(), diag::err_literal_operator_deleted) + << SL->getSourceRange() << II->getName(); + //FIXME: Note the deleted function + return ExprError(); + case OR_Success: + break; + } + + assert(Best->Function && "Literal operator function not a real function"); + FunctionDecl *FD = Best->Function; + + ExprResult InputInit + = PerformCopyInitialization(InitializedEntity::InitializeParameter( + FD->getParamDecl(0)), + SourceLocation(), Owned(SL)); + if (InputInit.isInvalid()) + return ExprError(); + Args[0] = InputInit.takeAs(); + + QualType ResultTy = FD->getResultType().getNonReferenceType(); + Expr *Fn = new (Context) DeclRefExpr(FD, FD->getType(), SourceLocation()); + UsualUnaryConversions(Fn); + + UDLiteralExpr *E = new (Context) UDLiteralExpr(Context, SL, Fn, Args, 2, + ResultTy); + + if (CheckCallReturnType(FD->getResultType(), SL->getLocStart(), E, FD)) + return ExprError(); + + return MaybeBindToTemporary(E); +} diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h index 08cfd68d66..8784c844be 100644 --- a/lib/Sema/TreeTransform.h +++ b/lib/Sema/TreeTransform.h @@ -6028,6 +6028,12 @@ TreeTransform::TransformObjCEncodeExpr(ObjCEncodeExpr *E) { E->getRParenLoc()); } +template +ExprResult +TreeTransform::TransformUDLiteralExpr(UDLiteralExpr *E) { + return SemaRef.Owned(E->Retain()); +} + template ExprResult TreeTransform::TransformObjCMessageExpr(ObjCMessageExpr *E) { diff --git a/test/Parser/cxx0x-literal-operators.cpp b/test/Parser/cxx0x-literal-operators.cpp index 30b290382c..5bebb0a707 100644 --- a/test/Parser/cxx0x-literal-operators.cpp +++ b/test/Parser/cxx0x-literal-operators.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -verify -std=c++0x %s void operator "" (const char *); // expected-error {{expected identifier}} -void operator "k" foo(const char *); // expected-error {{string literal after 'operator' must be '""'}} -void operator "" tester (const char *); +void operator "k"_foo(const char *); // expected-error {{string literal after 'operator' must be '""'}} +void operator ""_tester (const char *); diff --git a/test/SemaCXX/literal-operator-dcls.cpp b/test/SemaCXX/literal-operator-dcls.cpp new file mode 100644 index 0000000000..88bf2b936f --- /dev/null +++ b/test/SemaCXX/literal-operator-dcls.cpp @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++0x %s + +#include + +struct tag { + void operator "" _tag_bad (const char *); // expected-error {{literal operator 'operator "" _tag_bad' must be in a namespace or global scope}} + friend void operator "" _tag_good (const char *); +}; + +namespace ns { void operator "" _ns_good (const char *); } + +// Check extern "C++" declarations +extern "C++" void operator "" _extern_good (const char *); +extern "C++" { void operator "" _extern_good (const char *); } + +void fn () { void operator "" _fn_bad (const char *); } // expected-error {{literal operator 'operator "" _fn_bad' must be in a namespace or global scope}} + +// Warning name +void operator "" warn (const char *); // expected-warning {{reserved for future standardization}} + +// One-param declarations (const char * was already checked) +void operator "" _good (char); +void operator "" _good (wchar_t); +void operator "" _good (char16_t); +void operator "" _good (char32_t); +void operator "" _good (unsigned long long); +void operator "" _good (long double); + +// Two-param declarations +void operator "" _good (const char *, size_t); +void operator "" _good (const wchar_t *, size_t); +void operator "" _good (const char16_t *, size_t); +void operator "" _good (const char32_t *, size_t); + +// Check typedef and array equivalences +void operator "" _good (const char[]); +typedef const char c; +void operator "" _good (c*); + +// Check extra cv-qualifiers +void operator "" _cv_good (volatile const char *, const size_t); + +// Template delcaration (not implemented yet) +// template void operator "" good (); + +// FIXME: Test some invalid decls that might crop up. diff --git a/test/SemaCXX/literal-operators.cpp b/test/SemaCXX/literal-operators.cpp index ec585a61da..93fd4b6a14 100644 --- a/test/SemaCXX/literal-operators.cpp +++ b/test/SemaCXX/literal-operators.cpp @@ -2,42 +2,21 @@ #include -struct tag { - void operator "" tag_bad (const char *); // expected-error {{literal operator 'operator "" tag_bad' must be in a namespace or global scope}} - friend void operator "" tag_good (const char *); +template struct same_type { + static const bool value = false; }; -namespace ns { void operator "" ns_good (const char *); } - -// Check extern "C++" declarations -extern "C++" void operator "" extern_good (const char *); -extern "C++" { void operator "" extern_good (const char *); } - -void fn () { void operator "" fn_bad (const char *); } // expected-error {{literal operator 'operator "" fn_bad' must be in a namespace or global scope}} - -// One-param declarations (const char * was already checked) -void operator "" good (char); -void operator "" good (wchar_t); -void operator "" good (char16_t); -void operator "" good (char32_t); -void operator "" good (unsigned long long); -void operator "" good (long double); +template struct same_type { + static const bool value = true; +}; -// Two-param declarations -void operator "" good (const char *, size_t); -void operator "" good (const wchar_t *, size_t); -void operator "" good (const char16_t *, size_t); -void operator "" good (const char32_t *, size_t); +int operator "" _int (const char *, size_t); +static_assert(same_type::value, "not the same type!"); -// Check typedef and array equivalences -void operator "" good (const char[]); -typedef const char c; -void operator "" good (c*); +int i = ""_int; +int j = L""_int; // expected-error {{no matching literal operator function}} -// Check extra cv-qualifiers -void operator "" cv_good (volatile const char *, const size_t); +int operator "" _int (const wchar_t *, size_t); -// Template delcaration (not implemented yet) -// template void operator "" good (); +int k = L""_int; -// FIXME: Test some invalid decls that might crop up. diff --git a/tools/libclang/CXCursor.cpp b/tools/libclang/CXCursor.cpp index 40f91a7383..b531011650 100644 --- a/tools/libclang/CXCursor.cpp +++ b/tools/libclang/CXCursor.cpp @@ -218,6 +218,7 @@ CXCursor cxcursor::MakeCXCursor(Stmt *S, Decl *Parent, ASTUnit *TU) { case Stmt::CXXMemberCallExprClass: case Stmt::CXXConstructExprClass: case Stmt::CXXTemporaryObjectExprClass: + case Stmt::UDLiteralExprClass: // FIXME: CXXUnresolvedConstructExpr // FIXME: ObjCImplicitSetterGetterRefExpr? K = CXCursor_CallExpr;