From: Ted Kremenek Date: Fri, 9 Jan 2009 22:05:30 +0000 (+0000) Subject: Enhance PTH 'getSpelling' caching: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5f074266cc59563036c40516c814d63825723e20;p=clang Enhance PTH 'getSpelling' caching: - Refactor caching logic into a helper class PTHSpellingSearch - Allow "random accesses" in the spelling cache, thus catching the remaining cases where 'getSpelling' wasn't hitting the PTH cache For -Eonly, PTH, Cocoa.h: - This reduces wall time by 3% (user time unchanged, sys time reduced) - This reduces the amount of paged source by 1112K. The remaining 1112K still being paged in is from somewhere else (investigating). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62009 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Lex/PTHLexer.h b/include/clang/Lex/PTHLexer.h index 8928ad4aa8..f19d67958f 100644 --- a/include/clang/Lex/PTHLexer.h +++ b/include/clang/Lex/PTHLexer.h @@ -20,8 +20,13 @@ namespace clang { class PTHManager; +class PTHSpellingSearch; class PTHLexer : public PreprocessorLexer { +private: + /// FileID - The SourceManager FileID for the original source file. + unsigned FileID; + /// TokBuf - Buffer from PTH file containing raw token data. const char* TokBuf; @@ -41,13 +46,10 @@ class PTHLexer : public PreprocessorLexer { /// CurPPCondPtr - Pointer inside PPCond that refers to the next entry /// to process when doing quick skipping of preprocessor blocks. const char* CurPPCondPtr; - - /// Pointer to a side table containing offsets in the PTH file - /// for token spellings. - const char* SpellingTable; - - /// Number of cached spellings left in the cached source file. - unsigned SpellingsLeft; + + /// MySpellingMgr - Reference to the spelling manager used to get spellings + /// for the source file indicated by \c FileID. + PTHSpellingSearch& MySpellingSrch; PTHLexer(const PTHLexer&); // DO NOT IMPLEMENT void operator=(const PTHLexer&); // DO NOT IMPLEMENT @@ -59,14 +61,17 @@ class PTHLexer : public PreprocessorLexer { PTHManager& PTHMgr; Token EofToken; - -public: + +protected: + friend class PTHManager; /// Create a PTHLexer for the specified token stream. PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, - const char* ppcond, const char* spellingTable, unsigned numSpellings, + const char* ppcond, + PTHSpellingSearch& mySpellingSrch, PTHManager& PM); - +public: + ~PTHLexer() {} /// Lex - Return the next token. diff --git a/include/clang/Lex/PTHManager.h b/include/clang/Lex/PTHManager.h index fc814d0063..eefe574a54 100644 --- a/include/clang/Lex/PTHManager.h +++ b/include/clang/Lex/PTHManager.h @@ -16,6 +16,7 @@ #include "clang/Lex/PTHLexer.h" #include "clang/Basic/LangOptions.h" +#include "llvm/ADT/DenseMap.h" #include namespace llvm { @@ -28,14 +29,41 @@ class FileEntry; class IdentifierInfo; class IdentifierTable; class PTHLexer; +class PTHManager; + +class PTHSpellingSearch { + PTHManager& PTHMgr; -class PTHManager { + const char* TableBeg; + const char* TableEnd; + + unsigned SpellingsLeft; + const char* LinearItr; + +public: + enum { SpellingEntrySize = 4*2 }; + unsigned getSpellingBinarySearch(unsigned fpos, const char *&Buffer); + unsigned getSpellingLinearSearch(unsigned fpos, const char *&Buffer); + + PTHSpellingSearch(PTHManager& pm, unsigned numSpellings, const char* tableBeg) + : PTHMgr(pm), + TableBeg(tableBeg), + TableEnd(tableBeg + numSpellings*SpellingEntrySize), + SpellingsLeft(numSpellings), + LinearItr(tableBeg) {} +}; + +class PTHManager { friend class PTHLexer; + friend class PTHSpellingSearch; /// The memory mapped PTH file. const llvm::MemoryBuffer* Buf; + /// A map from FileIDs to SpellingSearch objects. + llvm::DenseMap SpellingMap; + /// IdMap - A lazily generated cache mapping from persistent identifiers to /// IdentifierInfo*. IdentifierInfo** PerIDCache; @@ -70,10 +98,12 @@ class PTHManager { /// objects from the PTH file. IdentifierInfo* GetIdentifierInfo(unsigned); - /// GetSpelling - Used by PTHLexer classes to get the cached spelling - /// for a token. - unsigned GetSpelling(unsigned PTHOffset, const char*& Buffer); + /// getSpellingAtPTHOffset - Used by PTHLexer classes to get the cached + /// spelling for a token. + unsigned getSpellingAtPTHOffset(unsigned PTHOffset, const char*& Buffer); + unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer); + public: ~PTHManager(); diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index a982561883..a9eb88a19d 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/Streams.h" using namespace clang; @@ -50,12 +51,14 @@ static inline uint32_t Read32(const char*& data) { PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, const char* ppcond, - const char* spellingTable, unsigned NumSpellings, + PTHSpellingSearch& mySpellingSrch, PTHManager& PM) : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0), - PPCond(ppcond), CurPPCondPtr(ppcond), - SpellingTable(spellingTable), SpellingsLeft(NumSpellings), - PTHMgr(PM) {} + PPCond(ppcond), CurPPCondPtr(ppcond), MySpellingSrch(mySpellingSrch), + PTHMgr(PM) +{ + FileID = fileloc.getFileID(); +} void PTHLexer::Lex(Token& Tok) { LexNextToken: @@ -100,6 +103,13 @@ LexNextToken: //===--------------------------------------==// // Process the token. //===--------------------------------------==// +#if 0 + SourceManager& SM = PP->getSourceManager(); + llvm::cerr << SM.getFileEntryForID(FileID)->getName() + << ':' << SM.getLogicalLineNumber(Tok.getLocation()) + << ':' << SM.getLogicalColumnNumber(Tok.getLocation()) + << '\n'; +#endif if (k == tok::identifier) { MIOpt.ReadToken(); @@ -289,7 +299,25 @@ SourceLocation PTHLexer::getSourceLocation() { return SourceLocation::getFileLoc(FileID, offset); } -unsigned PTHManager::GetSpelling(unsigned PTHOffset, const char *& Buffer) { +//===----------------------------------------------------------------------===// +// getSpelling() - Use cached data in PTH files for getSpelling(). +//===----------------------------------------------------------------------===// + +unsigned PTHManager::getSpelling(unsigned FileID, unsigned fpos, + const char *& Buffer) { + + llvm::DenseMap::iterator I = + SpellingMap.find(FileID); + + if (I == SpellingMap.end()) + return 0; + + return I->second->getSpellingBinarySearch(fpos, Buffer); +} + +unsigned PTHManager::getSpellingAtPTHOffset(unsigned PTHOffset, + const char *& Buffer) { + const char* p = Buf->getBufferStart() + PTHOffset; assert(p < Buf->getBufferEnd()); @@ -302,13 +330,15 @@ unsigned PTHManager::GetSpelling(unsigned PTHOffset, const char *& Buffer) { return len; } -unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) { - const char* p = SpellingTable; - SourceManager& SM = PP->getSourceManager(); - unsigned fpos = SM.getFullFilePos(SM.getPhysicalLoc(sloc)); +unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos, + const char *&Buffer) { + const char* p = LinearItr; unsigned len = 0; - - while (SpellingsLeft) { + + if (!SpellingsLeft) + return getSpellingBinarySearch(fpos, Buffer); + + do { uint32_t TokOffset = ((uint32_t) ((uint8_t) p[0])) | (((uint32_t) ((uint8_t) p[1])) << 8) @@ -316,7 +346,7 @@ unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) { | (((uint32_t) ((uint8_t) p[3])) << 24); if (TokOffset > fpos) - break; + return getSpellingBinarySearch(fpos, Buffer); --SpellingsLeft; @@ -328,18 +358,72 @@ unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) { | (((uint32_t) ((uint8_t) p[6])) << 16) | (((uint32_t) ((uint8_t) p[7])) << 24); - len = PTHMgr.GetSpelling(SpellingPTHOffset, Buffer); + len = PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer); break; } // No match. Keep on looking. p += sizeof(uint32_t)*2; } + while (SpellingsLeft); - SpellingTable = p; + LinearItr = p; return len; } +unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos, + const char *& Buffer) { + + assert ((TableEnd - TableBeg) % SpellingEntrySize == 0); + + unsigned min = 0; + const char* tb = TableBeg; + unsigned max = (TableEnd - tb) / SpellingEntrySize; + + while (min != max) { + unsigned i = (max - min) / 2 + min; + const char* p = tb + (i * SpellingEntrySize); + + uint32_t TokOffset = + ((uint32_t) ((uint8_t) p[0])) + | (((uint32_t) ((uint8_t) p[1])) << 8) + | (((uint32_t) ((uint8_t) p[2])) << 16) + | (((uint32_t) ((uint8_t) p[3])) << 24); + + if (TokOffset > fpos) { + max = i; + continue; + } + + if (TokOffset < fpos) { + min = i; + continue; + } + + uint32_t SpellingPTHOffset = + ((uint32_t) ((uint8_t) p[4])) + | (((uint32_t) ((uint8_t) p[5])) << 8) + | (((uint32_t) ((uint8_t) p[6])) << 16) + | (((uint32_t) ((uint8_t) p[7])) << 24); + + return PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer); + } + + return 0; +} + +unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) { + SourceManager& SM = PP->getSourceManager(); + sloc = SM.getPhysicalLoc(sloc); + unsigned fid = SM.getCanonicalFileID(sloc); + unsigned fpos = SM.getFullFilePos(sloc); + + if (fid == FileID) + return MySpellingSrch.getSpellingLinearSearch(fpos, Buffer); + + return PTHMgr.getSpelling(fid, fpos, Buffer); +} + //===----------------------------------------------------------------------===// // Internal Data Structures for PTH file lookup and resolving identifiers. //===----------------------------------------------------------------------===// @@ -538,6 +622,11 @@ PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { if (len == 0) spellingTable = 0; assert(data < Buf->getBufferEnd()); + + // Create the SpellingSearch object for this FileID. + PTHSpellingSearch* ss = new PTHSpellingSearch(*this, len, spellingTable); + SpellingMap[FileID] = ss; + return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond, - spellingTable, len, *this); + *ss, *this); }