From: Ted Kremenek <kremenek@apple.com>
Date: Fri, 9 Jan 2009 22:05:30 +0000 (+0000)
Subject: Enhance PTH 'getSpelling' caching:
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5f074266cc59563036c40516c814d63825723e20;p=clang

Enhance PTH 'getSpelling' caching:
- Refactor caching logic into a helper class PTHSpellingSearch
- Allow "random accesses" in the spelling cache, thus catching the remaining
  cases where 'getSpelling' wasn't hitting the PTH cache

For -Eonly, PTH, Cocoa.h:
- This reduces wall time by 3% (user time unchanged, sys time reduced)
- This reduces the amount of paged source by 1112K.
  The remaining 1112K still being paged in is from somewhere else
  (investigating).


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62009 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/Lex/PTHLexer.h b/include/clang/Lex/PTHLexer.h
index 8928ad4aa8..f19d67958f 100644
--- a/include/clang/Lex/PTHLexer.h
+++ b/include/clang/Lex/PTHLexer.h
@@ -20,8 +20,13 @@
 namespace clang {
   
 class PTHManager;
+class PTHSpellingSearch;
   
 class PTHLexer : public PreprocessorLexer {
+private:
+  /// FileID - The SourceManager FileID for the original source file.
+  unsigned FileID;
+  
   /// TokBuf - Buffer from PTH file containing raw token data.
   const char* TokBuf;
   
@@ -41,13 +46,10 @@ class PTHLexer : public PreprocessorLexer {
   /// CurPPCondPtr - Pointer inside PPCond that refers to the next entry
   ///  to process when doing quick skipping of preprocessor blocks.
   const char* CurPPCondPtr;
-  
-  /// Pointer to a side table containing offsets in the PTH file
-  ///  for token spellings.
-  const char* SpellingTable;
-  
-  /// Number of cached spellings left in the cached source file.
-  unsigned SpellingsLeft;
+
+  /// MySpellingMgr - Reference to the spelling manager used to get spellings
+  ///  for the source file indicated by \c FileID.
+  PTHSpellingSearch& MySpellingSrch;  
 
   PTHLexer(const PTHLexer&);  // DO NOT IMPLEMENT
   void operator=(const PTHLexer&); // DO NOT IMPLEMENT
@@ -59,14 +61,17 @@ class PTHLexer : public PreprocessorLexer {
   PTHManager& PTHMgr;
   
   Token EofToken;
-  
-public:  
+
+protected:
+  friend class PTHManager;
 
   /// Create a PTHLexer for the specified token stream.
   PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 
-           const char* ppcond, const char* spellingTable, unsigned numSpellings,
+           const char* ppcond,
+           PTHSpellingSearch& mySpellingSrch,
            PTHManager& PM);
-  
+public:  
+
   ~PTHLexer() {}
     
   /// Lex - Return the next token.
diff --git a/include/clang/Lex/PTHManager.h b/include/clang/Lex/PTHManager.h
index fc814d0063..eefe574a54 100644
--- a/include/clang/Lex/PTHManager.h
+++ b/include/clang/Lex/PTHManager.h
@@ -16,6 +16,7 @@
 
 #include "clang/Lex/PTHLexer.h"
 #include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/DenseMap.h"
 #include <string>
 
 namespace llvm {
@@ -28,14 +29,41 @@ class FileEntry;
 class IdentifierInfo;
 class IdentifierTable;
 class PTHLexer;
+class PTHManager;
+
+class PTHSpellingSearch {
+  PTHManager& PTHMgr;
   
-class PTHManager {
+  const char* TableBeg;
+  const char* TableEnd;
+  
+  unsigned SpellingsLeft;
+  const char* LinearItr;
+  
+public:
+  enum { SpellingEntrySize = 4*2 };
   
+  unsigned getSpellingBinarySearch(unsigned fpos, const char *&Buffer);
+  unsigned getSpellingLinearSearch(unsigned fpos, const char *&Buffer);
+  
+  PTHSpellingSearch(PTHManager& pm, unsigned numSpellings, const char* tableBeg)
+    : PTHMgr(pm),
+      TableBeg(tableBeg),
+      TableEnd(tableBeg + numSpellings*SpellingEntrySize),
+      SpellingsLeft(numSpellings),
+      LinearItr(tableBeg) {}
+};  
+  
+class PTHManager {
   friend class PTHLexer;
+  friend class PTHSpellingSearch;
   
   /// The memory mapped PTH file.
   const llvm::MemoryBuffer* Buf;
   
+  /// A map from FileIDs to SpellingSearch objects.
+  llvm::DenseMap<unsigned,PTHSpellingSearch*> SpellingMap;
+  
   /// IdMap - A lazily generated cache mapping from persistent identifiers to
   ///  IdentifierInfo*.
   IdentifierInfo** PerIDCache;
@@ -70,10 +98,12 @@ class PTHManager {
   ///  objects from the PTH file.
   IdentifierInfo* GetIdentifierInfo(unsigned);
   
-  /// GetSpelling - Used by PTHLexer classes to get the cached spelling
-  ///  for a token.
-  unsigned GetSpelling(unsigned PTHOffset, const char*& Buffer);
+  /// getSpellingAtPTHOffset - Used by PTHLexer classes to get the cached 
+  ///  spelling for a token.
+  unsigned getSpellingAtPTHOffset(unsigned PTHOffset, const char*& Buffer);
 
+  unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer);
+  
 public:
   
   ~PTHManager();
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index a982561883..a9eb88a19d 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/Streams.h"
 
 using namespace clang;
 
@@ -50,12 +51,14 @@ static inline uint32_t Read32(const char*& data) {
 
 PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
                    const char* ppcond,
-                   const char* spellingTable, unsigned NumSpellings,
+                   PTHSpellingSearch& mySpellingSrch,
                    PTHManager& PM)
   : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
-    PPCond(ppcond), CurPPCondPtr(ppcond), 
-    SpellingTable(spellingTable), SpellingsLeft(NumSpellings),
-    PTHMgr(PM) {}
+    PPCond(ppcond), CurPPCondPtr(ppcond), MySpellingSrch(mySpellingSrch),
+    PTHMgr(PM)
+{      
+  FileID = fileloc.getFileID();
+}
 
 void PTHLexer::Lex(Token& Tok) {
 LexNextToken:
@@ -100,6 +103,13 @@ LexNextToken:
   //===--------------------------------------==//
   // Process the token.
   //===--------------------------------------==//
+#if 0  
+  SourceManager& SM = PP->getSourceManager();
+  llvm::cerr << SM.getFileEntryForID(FileID)->getName()
+    << ':' << SM.getLogicalLineNumber(Tok.getLocation())
+    << ':' << SM.getLogicalColumnNumber(Tok.getLocation())
+    << '\n';
+#endif  
 
   if (k == tok::identifier) {
     MIOpt.ReadToken();
@@ -289,7 +299,25 @@ SourceLocation PTHLexer::getSourceLocation() {
   return SourceLocation::getFileLoc(FileID, offset);
 }
 
-unsigned PTHManager::GetSpelling(unsigned PTHOffset, const char *& Buffer) {
+//===----------------------------------------------------------------------===//
+// getSpelling() - Use cached data in PTH files for getSpelling().
+//===----------------------------------------------------------------------===//
+
+unsigned PTHManager::getSpelling(unsigned FileID, unsigned fpos,
+                                 const char *& Buffer) {
+  
+  llvm::DenseMap<unsigned,PTHSpellingSearch*>::iterator I =
+    SpellingMap.find(FileID);
+
+  if (I == SpellingMap.end())
+      return 0;
+    
+  return I->second->getSpellingBinarySearch(fpos, Buffer);  
+}
+
+unsigned PTHManager::getSpellingAtPTHOffset(unsigned PTHOffset,
+                                            const char *& Buffer) {
+
   const char* p = Buf->getBufferStart() + PTHOffset;
   assert(p < Buf->getBufferEnd());
   
@@ -302,13 +330,15 @@ unsigned PTHManager::GetSpelling(unsigned PTHOffset, const char *& Buffer) {
   return len;
 }
 
-unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
-  const char* p = SpellingTable;
-  SourceManager& SM = PP->getSourceManager();
-  unsigned fpos = SM.getFullFilePos(SM.getPhysicalLoc(sloc));
+unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos,
+                                                    const char *&Buffer) {
+  const char* p = LinearItr;
   unsigned len = 0;
-
-  while (SpellingsLeft) {
+  
+  if (!SpellingsLeft)
+    return getSpellingBinarySearch(fpos, Buffer);
+  
+  do {
     uint32_t TokOffset = 
       ((uint32_t) ((uint8_t) p[0]))
       | (((uint32_t) ((uint8_t) p[1])) << 8)
@@ -316,7 +346,7 @@ unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
       | (((uint32_t) ((uint8_t) p[3])) << 24);
     
     if (TokOffset > fpos)
-      break;
+      return getSpellingBinarySearch(fpos, Buffer);
     
     --SpellingsLeft;
     
@@ -328,18 +358,72 @@ unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
         | (((uint32_t) ((uint8_t) p[6])) << 16)
         | (((uint32_t) ((uint8_t) p[7])) << 24);
       
-      len = PTHMgr.GetSpelling(SpellingPTHOffset, Buffer);
+      len = PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
       break;
     }
 
     // No match.  Keep on looking.
     p += sizeof(uint32_t)*2;
   }
+  while (SpellingsLeft);
 
-  SpellingTable = p;
+  LinearItr = p;
   return len;
 }
 
+unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos,
+                                                    const char *& Buffer) {
+  
+  assert ((TableEnd - TableBeg) % SpellingEntrySize == 0);
+  
+  unsigned min = 0;
+  const char* tb = TableBeg;
+  unsigned max = (TableEnd - tb) / SpellingEntrySize;
+
+  while (min != max) {
+    unsigned i = (max - min) / 2 + min;
+    const char* p = tb + (i * SpellingEntrySize);
+    
+    uint32_t TokOffset = 
+      ((uint32_t) ((uint8_t) p[0]))
+      | (((uint32_t) ((uint8_t) p[1])) << 8)
+      | (((uint32_t) ((uint8_t) p[2])) << 16)
+      | (((uint32_t) ((uint8_t) p[3])) << 24);
+    
+    if (TokOffset > fpos) {
+      max = i;
+      continue;
+    }
+    
+    if (TokOffset < fpos) {
+      min = i;
+      continue;
+    }
+    
+    uint32_t SpellingPTHOffset = 
+        ((uint32_t) ((uint8_t) p[4]))
+        | (((uint32_t) ((uint8_t) p[5])) << 8)
+        | (((uint32_t) ((uint8_t) p[6])) << 16)
+        | (((uint32_t) ((uint8_t) p[7])) << 24);
+    
+    return PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
+  }
+  
+  return 0;
+}
+
+unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
+  SourceManager& SM = PP->getSourceManager();
+  sloc = SM.getPhysicalLoc(sloc);
+  unsigned fid = SM.getCanonicalFileID(sloc);
+  unsigned fpos = SM.getFullFilePos(sloc);
+  
+  if (fid == FileID)
+    return MySpellingSrch.getSpellingLinearSearch(fpos, Buffer);
+
+  return PTHMgr.getSpelling(fid, fpos, Buffer);
+}
+
 //===----------------------------------------------------------------------===//
 // Internal Data Structures for PTH file lookup and resolving identifiers.
 //===----------------------------------------------------------------------===//
@@ -538,6 +622,11 @@ PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
   if (len == 0) spellingTable = 0;
 
   assert(data < Buf->getBufferEnd());
+  
+  // Create the SpellingSearch object for this FileID.
+  PTHSpellingSearch* ss = new PTHSpellingSearch(*this, len, spellingTable);
+  SpellingMap[FileID] = ss;
+  
   return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
-                      spellingTable, len, *this); 
+                      *ss, *this); 
 }