Emit32(ResolveID(T.getIdentifierInfo()));
Emit32(fpos);
- // For specific tokens we cache their spelling.
- if (T.getIdentifierInfo())
- return;
-
- switch (T.getKind()) {
- default:
- break;
- case tok::string_literal:
- case tok::wide_string_literal:
- case tok::angle_string_literal:
- case tok::numeric_constant:
- case tok::char_constant: {
- // FIXME: This uses the slow getSpelling(). Perhaps we do better
- // in the future? This only slows down PTH generation.
- const std::string& spelling = PP.getSpelling(T);
- const char* s = spelling.c_str();
-
- // Get the string entry.
- llvm::StringMapEntry<Offset> *E =
- &CachedStrs.GetOrCreateValue(s, s+spelling.size());
-
- // Store the address of the string entry in our spelling map.
- (*CurSpellMap).push_back(std::make_pair(fpos, E));
+ // Literals (strings, numbers, characters) get cached spellings.
+ if (T.isLiteral()) {
+ // FIXME: This uses the slow getSpelling(). Perhaps we do better
+ // in the future? This only slows down PTH generation.
+ const std::string &spelling = PP.getSpelling(T);
+ const char* s = spelling.c_str();
+
+ // Get the string entry.
+ llvm::StringMapEntry<Offset> *E =
+ &CachedStrs.GetOrCreateValue(s, s+spelling.size());
- break;
- }
+ // Store the address of the string entry in our spelling map.
+ CurSpellMap->push_back(std::make_pair(fpos, E));
}
}
// Avoid spelling identifiers, the most common form of token.
FirstChar = II->getName()[0];
} else if (!Tok.needsCleaning()) {
+ // FIXME: SPEED UP LITERALS!
SourceManager &SrcMgr = PP.getSourceManager();
FirstChar =
*SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
const char *Str = II->getName();
unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
OS.write(Str, Len);
+ // FIXME: ACCELERATE LITERALS
} else if (Tok.getLength() < 256) {
const char *TokPtr = Buffer;
unsigned Len = PP.getSpelling(Tok, TokPtr);
/// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
/// invoked.
void AnnotateCachedTokens(const Token &Tok) {
- assert(Tok.isAnnotationToken() && "Expected annotation token");
+ assert(Tok.isAnnotation() && "Expected annotation token");
if (CachedLexPos != 0 && isBacktrackEnabled())
AnnotatePreviousCachedTokens(Tok);
}
return *SourceMgr.getCharacterData(SL);
}
- /// CreateString - Plop the specified string into a scratch buffer and return
- /// a location for it. If specified, the source location provides a source
- /// location for the token.
- SourceLocation CreateString(const char *Buf, unsigned Len,
- SourceLocation SourceLoc = SourceLocation());
+ /// CreateString - Plop the specified string into a scratch buffer and set the
+ /// specified token's location and length to it. If specified, the source
+ /// location provides a location of the instantiation point of the token.
+ void CreateString(const char *Buf, unsigned Len,
+ Token &Tok, SourceLocation SourceLoc = SourceLocation());
/// DumpToken - Print the token to stderr, used for debugging.
///
public:
ScratchBuffer(SourceManager &SM);
- /// getToken - Splat the specified text into a temporary MemoryBuffer and
- /// return a SourceLocation that refers to the token. The SourceLoc value
- /// gives a virtual location that the token will appear to be from.
- SourceLocation getToken(const char *Buf, unsigned Len,
- SourceLocation SourceLoc);
-
/// getToken - Splat the specified text into a temporary MemoryBuffer and
/// return a SourceLocation that refers to the token. This is just like the
/// previous method, but returns a location that indicates the physloc of the
/// token.
- SourceLocation getToken(const char *Buf, unsigned Len);
+ SourceLocation getToken(const char *Buf, unsigned Len, const char *&DestPtr);
private:
void AllocScratchBuffer(unsigned RequestLen);
/// The location of the token.
SourceLocation Loc;
- // Conceptually these next two fields could be in a union with
- // access depending on isAnnotationToken(). However, this causes gcc
- // 4.2 to pessimize LexTokenInternal, a very performance critical
- // routine. Keeping as separate members with casts until a more
- // beautiful fix presents itself.
+ // Conceptually these next two fields could be in a union. However, this
+ // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
+ // routine. Keeping as separate members with casts until a more beautiful fix
+ // presents itself.
/// UintData - This holds either the length of the token text, when
/// a normal token, or the end of the SourceRange when an annotation
/// token.
unsigned UintData;
- /// PtrData - For normal tokens, this points to the uniqued
- /// information for the identifier (if an identifier token) or
- /// null. For annotation tokens, this points to information specific
- /// to the annotation token.
+ /// PtrData - This is a union of four different pointer types, which depends
+ /// on what type of token this is:
+ /// Identifiers, keywords, etc:
+ /// This is an IdentifierInfo*, which contains the uniqued identifier
+ /// spelling.
+ /// Literals: isLiteral() returns true.
+ /// This is a pointer to the start of the token in a text buffer, which
+ /// may be dirty (have trigraphs / escaped newlines).
+ /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
+ /// This is a pointer to sema-specific data for the annotation token.
+ /// Other:
+ /// This is null.
void *PtrData;
/// Kind - The actual flavor of token this is.
bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
- bool isAnnotationToken() const {
+ /// isLiteral - Return true if this is a "literal", like a numeric
+ /// constant, string, etc.
+ bool isLiteral() const {
+ return is(tok::numeric_constant) || is(tok::char_constant) ||
+ is(tok::string_literal) || is(tok::wide_string_literal) ||
+ is(tok::angle_string_literal);
+ }
+
+ bool isAnnotation() const {
return is(tok::annot_typename) ||
is(tok::annot_cxxscope) ||
is(tok::annot_template_id);
}
-
+
/// getLocation - Return a source location identifier for the specified
/// offset in the current file.
SourceLocation getLocation() const { return Loc; }
unsigned getLength() const {
- assert(!isAnnotationToken() && "Annotation tokens have no length field");
+ assert(!isAnnotation() && "Annotation tokens have no length field");
return UintData;
}
void setLocation(SourceLocation L) { Loc = L; }
void setLength(unsigned Len) {
- assert(!isAnnotationToken() && "Annotation tokens have no length field");
+ assert(!isAnnotation() && "Annotation tokens have no length field");
UintData = Len;
}
SourceLocation getAnnotationEndLoc() const {
- assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token");
+ assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
return SourceLocation::getFromRawEncoding(UintData);
}
void setAnnotationEndLoc(SourceLocation L) {
- assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token");
+ assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
UintData = L.getRawEncoding();
}
}
IdentifierInfo *getIdentifierInfo() const {
- assert(!isAnnotationToken() && "Used IdentInfo on annotation token");
+ assert(!isAnnotation() && "Used IdentInfo on annotation token!");
+ if (isLiteral()) return 0;
return (IdentifierInfo*) PtrData;
}
void setIdentifierInfo(IdentifierInfo *II) {
PtrData = (void*) II;
}
-
+
+ /// getLiteralData - For a literal token (numeric constant, string, etc), this
+ /// returns a pointer to the start of it in the text buffer if known, null
+ /// otherwise.
+ const char *getLiteralData() const {
+ assert(isLiteral() && "Cannot get literal data of non-literal");
+ return reinterpret_cast<const char*>(PtrData);
+ }
+ void setLiteralData(const char *Ptr) {
+ assert(isLiteral() && "Cannot set literal data of non-literal");
+ PtrData = (void*)Ptr;
+ }
+
void *getAnnotationValue() const {
- assert(isAnnotationToken() && "Used AnnotVal on non-annotation token");
+ assert(isAnnotation() && "Used AnnotVal on non-annotation token");
return PtrData;
}
void setAnnotationValue(void *val) {
- assert(isAnnotationToken() && "Used AnnotVal on non-annotation token");
+ assert(isAnnotation() && "Used AnnotVal on non-annotation token");
PtrData = val;
}
if (!Tok.is(tok::identifier))
return false;
- if (Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope))
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+ if (Actions.isTypeName(*II, CurScope))
return true;
- return Tok.getIdentifierInfo() == Ident_super;
+ return II == Ident_super;
}
OwningExprResult ParseObjCAtExpression(SourceLocation AtLocation);
return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
// Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
+ Result.setLiteralData(TokStart);
}
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
Diag(NulCharacter, diag::null_in_string);
// Update the location of the token as well as the BufferPtr instance var.
+ const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr,
Wide ? tok::wide_string_literal : tok::string_literal);
+ Result.setLiteralData(TokStart);
}
/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
Diag(NulCharacter, diag::null_in_string);
// Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
+ Result.setLiteralData(TokStart);
}
Diag(NulCharacter, diag::null_in_char);
// Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::char_constant);
+ Result.setLiteralData(TokStart);
}
/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
Spelling += "*/"; // add suffix.
Result.setKind(tok::comment);
- Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(),
- Result.getLocation()));
- Result.setLength(Spelling.size());
+ PP->CreateString(&Spelling[0], Spelling.size(), Result,
+ Result.getLocation());
return true;
}
}
}
- Tok.setLength(Result.size());
- Tok.setLocation(PP.CreateString(&Result[0], Result.size()));
+ PP.CreateString(&Result[0], Result.size(), Tok);
return Tok;
}
}
void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
- assert(Tok.isAnnotationToken() && "Expected annotation token");
+ assert(Tok.isAnnotation() && "Expected annotation token");
assert(CachedLexPos != 0 && "Expected to have some cached tokens");
assert(CachedTokens[CachedLexPos-1].getLocation() == Tok.getAnnotationEndLoc()
&& "The annotation should be until the most recent cached token");
char TmpBuffer[100];
sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday,
TM->tm_year+1900);
- DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+
+ Token TmpTok;
+ TmpTok.startToken();
+ PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+ DATELoc = TmpTok.getLocation();
sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
- TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+ PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+ TIMELoc = TmpTok.getLocation();
}
/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
SourceMgr.getInstantiationLineNumber(Tok.getLocation()));
unsigned Length = strlen(TmpBuffer)-1;
Tok.setKind(tok::numeric_constant);
- Tok.setLength(Length);
- Tok.setLocation(CreateString(TmpBuffer, Length+1, Tok.getLocation()));
+ CreateString(TmpBuffer, Length+1, Tok, Tok.getLocation());
+ Tok.setLength(Length); // Trim off space.
} else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
SourceLocation Loc = Tok.getLocation();
if (II == Ident__BASE_FILE__) {
std::string FN =SourceMgr.getSourceName(SourceMgr.getInstantiationLoc(Loc));
FN = '"' + Lexer::Stringify(FN) + '"';
Tok.setKind(tok::string_literal);
- Tok.setLength(FN.size());
- Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
+ CreateString(&FN[0], FN.size(), Tok, Tok.getLocation());
} else if (II == Ident__DATE__) {
if (!DATELoc.isValid())
ComputeDATE_TIME(DATELoc, TIMELoc, *this);
sprintf(TmpBuffer, "%u ", Depth);
unsigned Length = strlen(TmpBuffer)-1;
Tok.setKind(tok::numeric_constant);
- Tok.setLength(Length);
- Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
+ CreateString(TmpBuffer, Length, Tok, Tok.getLocation());
+ Tok.setLength(Length); // Trim off space.
} else if (II == Ident__TIMESTAMP__) {
// MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be
// of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
unsigned Len = strlen(TmpBuffer);
TmpBuffer[Len-1] = '"'; // Replace the newline with a quote.
Tok.setKind(tok::string_literal);
- Tok.setLength(Len);
- Tok.setLocation(CreateString(TmpBuffer, Len+1, Tok.getLocation()));
+ CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation());
+ Tok.setLength(Len); // Trim off space.
} else {
assert(0 && "Unknown identifier!");
}
// Plop the string (including the newline and trailing null) into a buffer
// where we can lex it.
- SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size());
+ Token TmpTok;
+ TmpTok.startToken();
+ CreateString(&StrVal[0], StrVal.size(), TmpTok);
+ SourceLocation TokLoc = TmpTok.getLocation();
// Make and enter a lexer object so that we lex and expand the tokens just
// like any others.
}
// Otherwise, compute the start of the token in the input lexer buffer.
- const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+ const char *TokStart = 0;
+
+ if (Tok.isLiteral())
+ TokStart = Tok.getLiteralData();
+
+ if (TokStart == 0)
+ TokStart = SourceMgr.getCharacterData(Tok.getLocation());
// If this token contains nothing interesting, return it directly.
if (!Tok.needsCleaning()) {
Buffer = TokStart;
return Tok.getLength();
}
+
// Otherwise, hard case, relex the characters into the string.
char *OutBuf = const_cast<char*>(Buffer);
for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
/// CreateString - Plop the specified string into a scratch buffer and return a
/// location for it. If specified, the source location provides a source
/// location for the token.
-SourceLocation Preprocessor::
-CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) {
- if (SLoc.isValid())
- return ScratchBuf->getToken(Buf, Len, SLoc);
- return ScratchBuf->getToken(Buf, Len);
+void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
+ SourceLocation InstantiationLoc) {
+ Tok.setLength(Len);
+
+ const char *DestPtr;
+ SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr);
+
+ if (InstantiationLoc.isValid())
+ Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc, Len);
+ Tok.setLocation(Loc);
+
+ // If this is a literal token, set the pointer data.
+ if (Tok.isLiteral())
+ Tok.setLiteralData(DestPtr);
}
/// return a SourceLocation that refers to the token. This is just like the
/// method below, but returns a location that indicates the physloc of the
/// token.
-SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
+ const char *&DestPtr) {
if (BytesUsed+Len > ScratchBufSize)
AllocScratchBuffer(Len);
+ // Return a pointer to the character data.
+ DestPtr = CurBuffer+BytesUsed;
+
// Copy the token data into the buffer.
memcpy(CurBuffer+BytesUsed, Buf, Len);
return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len);
}
-
-/// getToken - Splat the specified text into a temporary MemoryBuffer and
-/// return a SourceLocation that refers to the token. The SourceLoc value
-/// gives a virtual location that the token will appear to be from.
-SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
- SourceLocation SourceLoc) {
- // Map the physloc to the specified sourceloc.
- return SourceMgr.createInstantiationLoc(getToken(Buf, Len), SourceLoc, Len);
-}
-
void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
// Only pay attention to the requested length if it is larger than our default
// page size. If it is, we allocate an entire chunk for it. This is to
/// If this returns true, the caller should immediately return the token.
bool TokenLexer::PasteTokens(Token &Tok) {
llvm::SmallVector<char, 128> Buffer;
+ const char *ResultTokStrPtr = 0;
do {
// Consume the ## operator.
SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
// Plop the pasted result (including the trailing newline and null) into a
// scratch buffer where we can lex it.
- SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size());
+ Token ResultTokTmp;
+ ResultTokTmp.startToken();
+ // Claim that the tmp token is a string_literal so that we can get the
+ // character pointer back from CreateString.
+ ResultTokTmp.setKind(tok::string_literal);
+ PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp);
+ SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
+ ResultTokStrPtr = ResultTokTmp.getLiteralData();
+
// Lex the resultant pasted token into Result.
Token Result;
assert(ResultTokLoc.isFileID() &&
"Should be a raw location into scratch buffer");
SourceManager &SourceMgr = PP.getSourceManager();
- std::pair<FileID, unsigned> LocInfo =
- SourceMgr.getDecomposedLoc(ResultTokLoc);
+ FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
- const char *ScratchBufStart =SourceMgr.getBufferData(LocInfo.first).first;
+ const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first;
// Make a lexer to lex this string from. Lex just this one token.
- const char *ResultStrData = ScratchBufStart+LocInfo.second;
-
// Make a lexer object so that we lex and expand the paste result.
- Lexer TL(SourceMgr.getLocForStartOfFile(LocInfo.first),
- PP.getLangOptions(),
- ScratchBufStart,
- ResultStrData,
- ResultStrData+LHSLen+RHSLen /*don't include null*/);
+ Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
+ PP.getLangOptions(), ScratchBufStart,
+ ResultTokStrPtr,
+ ResultTokStrPtr+LHSLen+RHSLen /*don't include null*/);
// Lex a token in raw mode. This way it won't look up identifiers
// automatically, lexing off the end will return an eof token, and
RHS.is(tok::slash)) {
HandleMicrosoftCommentPaste(Tok);
return true;
- } else {
- // TODO: If not in assembler language mode.
- PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
- << std::string(Buffer.begin(), Buffer.end()-1);
- return false;
}
+
+ // TODO: If not in assembler language mode.
+ PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
+ << std::string(Buffer.begin(), Buffer.end()-1);
+ return false;
}
// Turn ## into 'unknown' to avoid # ## # from looking like a paste
if (Tok.is(tok::identifier)) {
// Look up the identifier info for the token. We disabled identifier lookup
// by saying we're skipping contents, so we need to do this manually.
- Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+ Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr));
}
return false;
}
TokenRewriter::token_iterator
-TokenRewriter::AddTokenBefore(token_iterator I, const char *Val){
+TokenRewriter::AddTokenBefore(token_iterator I, const char *Val) {
unsigned Len = strlen(Val);
// Plop the string into the scratch buffer, then create a token for this
// string.
Token Tok;
Tok.startToken();
- Tok.setLocation(ScratchBuf->getToken(Val, Len));
+ const char *Spelling;
+ Tok.setLocation(ScratchBuf->getToken(Val, Len, Spelling));
Tok.setLength(Len);
// TODO: Form a whole lexer around this and relex the token! For now, just