// Token Spelling
//===----------------------------------------------------------------------===//
+/// \brief Slow case of getSpelling. Extract the characters comprising the
+/// spelling of this token from the provided input buffer.
+static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
+ const LangOptions &LangOpts, char *Spelling) {
+ assert(Tok.needsCleaning() && "getSpellingSlow called on simple token");
+
+ size_t Length = 0;
+ const char *BufEnd = BufPtr + Tok.getLength();
+
+ if (Tok.is(tok::string_literal)) {
+ // Munch the encoding-prefix and opening double-quote.
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+
+ if (Spelling[Length - 1] == '"')
+ break;
+ }
+
+ // Raw string literals need special handling; trigraph expansion and line
+ // splicing do not occur within their d-char-sequence nor within their
+ // r-char-sequence.
+ if (Length >= 2 &&
+ Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') {
+ // Search backwards from the end of the token to find the matching closing
+ // quote.
+ const char *RawEnd = BufEnd;
+ do --RawEnd; while (*RawEnd != '"');
+ size_t RawLength = RawEnd - BufPtr + 1;
+
+ // Everything between the quotes is included verbatim in the spelling.
+ memcpy(Spelling + Length, BufPtr, RawLength);
+ Length += RawLength;
+ BufPtr += RawLength;
+
+ // The rest of the token is lexed normally.
+ }
+ }
+
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+ }
+
+ assert(Length < Tok.getLength() &&
+ "NeedsCleaning flag set on token that didn't need cleaning!");
+ return Length;
+}
+
/// getSpelling() - Return the 'spelling' of this token. The spelling of a
/// token are the characters used to represent the token in the source file
/// after trigraph expansion and escaped-newline folding. In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
StringRef Lexer::getSpelling(SourceLocation loc,
- SmallVectorImpl<char> &buffer,
- const SourceManager &SM,
- const LangOptions &options,
- bool *invalid) {
+ SmallVectorImpl<char> &buffer,
+ const SourceManager &SM,
+ const LangOptions &options,
+ bool *invalid) {
// Break down the source location.
std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc);
// Common case: no need for cleaning.
if (!token.needsCleaning())
return StringRef(tokenBegin, length);
-
- // Hard case, we need to relex the characters into the string.
- buffer.clear();
- buffer.reserve(length);
-
- for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) {
- unsigned charSize;
- buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options));
- ti += charSize;
- }
+ // Hard case, we need to relex the characters into the string.
+ buffer.resize(length);
+ buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data()));
return StringRef(buffer.data(), buffer.size());
}
std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
const LangOptions &LangOpts, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
-
- // If this token contains nothing interesting, return it directly.
+
bool CharDataInvalid = false;
- const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
+ const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
&CharDataInvalid);
if (Invalid)
*Invalid = CharDataInvalid;
if (CharDataInvalid)
return std::string();
-
+
+ // If this token contains nothing interesting, return it directly.
if (!Tok.needsCleaning())
- return std::string(TokStart, TokStart+Tok.getLength());
-
+ return std::string(TokStart, TokStart + Tok.getLength());
+
std::string Result;
- Result.reserve(Tok.getLength());
-
- // Otherwise, hard case, relex the characters into the string.
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
- Ptr != End; ) {
- unsigned CharSize;
- Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts));
- Ptr += CharSize;
- }
- assert(Result.size() != unsigned(Tok.getLength()) &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
+ Result.resize(Tok.getLength());
+ Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
return Result;
}
}
// Otherwise, hard case, relex the characters into the string.
- char *OutBuf = const_cast<char*>(Buffer);
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
- Ptr != End; ) {
- unsigned CharSize;
- *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts);
- Ptr += CharSize;
- }
- assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
-
- return OutBuf-Buffer;
+ return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
}