From: Chris Lattner Date: Wed, 17 Nov 2010 07:26:20 +0000 (+0000) Subject: move getSpelling from Preprocessor to Lexer, which it is more conceptually related to. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b0607279cb98bbf2bbfe0db170aed39ef91e86a2;p=clang move getSpelling from Preprocessor to Lexer, which it is more conceptually related to. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119479 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 0237969a2d..fc9a8de434 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -211,6 +211,32 @@ public: /// and " characters. This does not add surrounding ""'s to the string. static void Stringify(llvm::SmallVectorImpl &Str); + + /// getSpelling - This method is used to get the spelling of a token into a + /// preallocated buffer, instead of as an std::string. The caller is required + /// to allocate enough space for the token, which is guaranteed to be at least + /// Tok.getLength() bytes long. The length of the actual result is returned. + /// + /// Note that this method may do two possible things: it may either fill in + /// the buffer specified with characters, or it may *change the input pointer* + /// to point to a constant buffer with the data already in it (avoiding a + /// copy). The caller is not allowed to modify the returned buffer pointer + /// if an internal buffer is returned. + static unsigned getSpelling(const Token &Tok, const char *&Buffer, + const SourceManager &SourceMgr, + const LangOptions &Features, + bool *Invalid = 0); + + /// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a + /// token is the characters used to represent the token in the source file + /// after trigraph expansion and escaped-newline folding. In particular, this + /// wants to get the true, uncanonicalized, spelling of things like digraphs + /// UCNs, etc. + static std::string getSpelling(const Token &Tok, + const SourceManager &SourceMgr, + const LangOptions &Features, + bool *Invalid = 0); + /// MeasureTokenLength - Relex the token at the specified location and return /// its length in bytes in the input file. If the token needs cleaning (e.g. /// includes a trigraph or an escaped newline) then this count includes bytes diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 174234357b..a493c7fbbf 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -641,17 +641,9 @@ public: /// UCNs, etc. /// /// \param Invalid If non-NULL, will be set \c true if an error occurs. - std::string getSpelling(const Token &Tok, bool *Invalid = 0) const; - - /// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a - /// token is the characters used to represent the token in the source file - /// after trigraph expansion and escaped-newline folding. In particular, this - /// wants to get the true, uncanonicalized, spelling of things like digraphs - /// UCNs, etc. - static std::string getSpelling(const Token &Tok, - const SourceManager &SourceMgr, - const LangOptions &Features, - bool *Invalid = 0); + std::string getSpelling(const Token &Tok, bool *Invalid = 0) const { + return Lexer::getSpelling(Tok, SourceMgr, Features, Invalid); + } /// getSpelling - This method is used to get the spelling of a token into a /// preallocated buffer, instead of as an std::string. The caller is required @@ -665,12 +657,8 @@ public: /// if an internal buffer is returned. unsigned getSpelling(const Token &Tok, const char *&Buffer, bool *Invalid = 0) const { - return getSpelling(Tok, Buffer, SourceMgr, Features, Invalid); + return Lexer::getSpelling(Tok, Buffer, SourceMgr, Features, Invalid); } - static unsigned getSpelling(const Token &Tok, const char *&Buffer, - const SourceManager &SourceMgr, - const LangOptions &Features, - bool *Invalid = 0); /// getSpelling - This method is used to get the spelling of a token into a /// SmallVector. Note that the returned StringRef may not point to the diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 3e68875768..da68495663 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -212,6 +212,107 @@ void Lexer::Stringify(llvm::SmallVectorImpl &Str) { } } +//===----------------------------------------------------------------------===// +// Token Spelling +//===----------------------------------------------------------------------===// + +/// getSpelling() - Return the 'spelling' of this token. The spelling of a +/// token are the characters used to represent the token in the source file +/// after trigraph expansion and escaped-newline folding. In particular, this +/// wants to get the true, uncanonicalized, spelling of things like digraphs +/// UCNs, etc. +std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, + const LangOptions &Features, bool *Invalid) { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token contains nothing interesting, return it directly. + bool CharDataInvalid = false; + const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), + &CharDataInvalid); + if (Invalid) + *Invalid = CharDataInvalid; + if (CharDataInvalid) + return std::string(); + + if (!Tok.needsCleaning()) + return std::string(TokStart, TokStart+Tok.getLength()); + + std::string Result; + Result.reserve(Tok.getLength()); + + // Otherwise, hard case, relex the characters into the string. + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); + Ptr += CharSize; + } + assert(Result.size() != unsigned(Tok.getLength()) && + "NeedsCleaning flag set on something that didn't need cleaning!"); + return Result; +} + +/// getSpelling - This method is used to get the spelling of a token into a +/// preallocated buffer, instead of as an std::string. The caller is required +/// to allocate enough space for the token, which is guaranteed to be at least +/// Tok.getLength() bytes long. The actual length of the token is returned. +/// +/// Note that this method may do two possible things: it may either fill in +/// the buffer specified with characters, or it may *change the input pointer* +/// to point to a constant buffer with the data already in it (avoiding a +/// copy). The caller is not allowed to modify the returned buffer pointer +/// if an internal buffer is returned. +unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, + const SourceManager &SourceMgr, + const LangOptions &Features, bool *Invalid) { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token is an identifier, just return the string from the identifier + // table, which is very quick. + if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { + Buffer = II->getNameStart(); + return II->getLength(); + } + + // Otherwise, compute the start of the token in the input lexer buffer. + const char *TokStart = 0; + + if (Tok.isLiteral()) + TokStart = Tok.getLiteralData(); + + if (TokStart == 0) { + bool CharDataInvalid = false; + TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); + if (Invalid) + *Invalid = CharDataInvalid; + if (CharDataInvalid) { + Buffer = ""; + return 0; + } + } + + // If this token contains nothing interesting, return it directly. + if (!Tok.needsCleaning()) { + Buffer = TokStart; + return Tok.getLength(); + } + + // Otherwise, hard case, relex the characters into the string. + char *OutBuf = const_cast(Buffer); + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); + Ptr += CharSize; + } + assert(unsigned(OutBuf-Buffer) != Tok.getLength() && + "NeedsCleaning flag set on something that didn't need cleaning!"); + + return OutBuf-Buffer; +} + + + static bool isWhitespace(unsigned char c); /// MeasureTokenLength - Relex the token at the specified location and return diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index 26bef2fdcc..7857ae440a 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -898,8 +898,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ // and 'spelled' tokens can only shrink. bool StringInvalid = false; unsigned ThisTokLen = - Preprocessor::getSpelling(StringToks[i], ThisTokBuf, SM, Features, - &StringInvalid); + Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features, + &StringInvalid); if (StringInvalid) { hadError = 1; continue; @@ -1019,8 +1019,8 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, bool StringInvalid = false; const char *SpellingPtr = &SpellingBuffer[0]; - unsigned TokLen = Preprocessor::getSpelling(Tok, SpellingPtr, SM, Features, - &StringInvalid); + unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features, + &StringInvalid); if (StringInvalid) return 0; diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index e0eb6661d6..0b2e970bda 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -278,116 +278,6 @@ void Preprocessor::CodeCompleteNaturalLanguage() { CodeComplete->CodeCompleteNaturalLanguage(); } -//===----------------------------------------------------------------------===// -// Token Spelling -//===----------------------------------------------------------------------===// - -/// getSpelling() - Return the 'spelling' of this token. The spelling of a -/// token are the characters used to represent the token in the source file -/// after trigraph expansion and escaped-newline folding. In particular, this -/// wants to get the true, uncanonicalized, spelling of things like digraphs -/// UCNs, etc. -std::string Preprocessor::getSpelling(const Token &Tok, - const SourceManager &SourceMgr, - const LangOptions &Features, - bool *Invalid) { - assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); - - // If this token contains nothing interesting, return it directly. - bool CharDataInvalid = false; - const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), - &CharDataInvalid); - if (Invalid) - *Invalid = CharDataInvalid; - if (CharDataInvalid) - return std::string(); - - if (!Tok.needsCleaning()) - return std::string(TokStart, TokStart+Tok.getLength()); - - std::string Result; - Result.reserve(Tok.getLength()); - - // Otherwise, hard case, relex the characters into the string. - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); - Ptr != End; ) { - unsigned CharSize; - Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); - Ptr += CharSize; - } - assert(Result.size() != unsigned(Tok.getLength()) && - "NeedsCleaning flag set on something that didn't need cleaning!"); - return Result; -} - -/// getSpelling() - Return the 'spelling' of this token. The spelling of a -/// token are the characters used to represent the token in the source file -/// after trigraph expansion and escaped-newline folding. In particular, this -/// wants to get the true, uncanonicalized, spelling of things like digraphs -/// UCNs, etc. -std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const { - return getSpelling(Tok, SourceMgr, Features, Invalid); -} - -/// getSpelling - This method is used to get the spelling of a token into a -/// preallocated buffer, instead of as an std::string. The caller is required -/// to allocate enough space for the token, which is guaranteed to be at least -/// Tok.getLength() bytes long. The actual length of the token is returned. -/// -/// Note that this method may do two possible things: it may either fill in -/// the buffer specified with characters, or it may *change the input pointer* -/// to point to a constant buffer with the data already in it (avoiding a -/// copy). The caller is not allowed to modify the returned buffer pointer -/// if an internal buffer is returned. -unsigned Preprocessor::getSpelling(const Token &Tok, const char *&Buffer, - const SourceManager &SourceMgr, - const LangOptions &Features, - bool *Invalid) { - assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); - - // If this token is an identifier, just return the string from the identifier - // table, which is very quick. - if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { - Buffer = II->getNameStart(); - return II->getLength(); - } - - // Otherwise, compute the start of the token in the input lexer buffer. - const char *TokStart = 0; - - if (Tok.isLiteral()) - TokStart = Tok.getLiteralData(); - - if (TokStart == 0) { - bool CharDataInvalid = false; - TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); - if (Invalid) - *Invalid = CharDataInvalid; - if (CharDataInvalid) { - Buffer = ""; - return 0; - } - } - - // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) { - Buffer = TokStart; - return Tok.getLength(); - } - - // Otherwise, hard case, relex the characters into the string. - char *OutBuf = const_cast(Buffer); - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); - Ptr != End; ) { - unsigned CharSize; - *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); - Ptr += CharSize; - } - assert(unsigned(OutBuf-Buffer) != Tok.getLength() && - "NeedsCleaning flag set on something that didn't need cleaning!"); - - return OutBuf-Buffer; -} /// getSpelling - This method is used to get the spelling of a token into a /// SmallVector. Note that the returned StringRef may not point to the