From d9d2b679d0728ea7f539f38aaea38e26b8b08043 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Sun, 21 Aug 2011 23:33:04 +0000 Subject: [PATCH] Boost the efficiency of SourceManager::getMacroArgExpandedLocation. Currently getMacroArgExpandedLocation is very inefficient and for the case of a location pointing at the main file it will end up checking almost all of the SLocEntries. Make it faster: -Use a map of macro argument chunks to their expanded source location. The map is for a single source file, it's stored in the file's ContentCache and lazily computed, like the source lines cache. -In SLocEntry's FileInfo add an 'unsigned NumCreatedFIDs' field that keeps track of the number of FileIDs (files and macros) that were created during preprocessing of that particular file SLocEntry. This is useful when computing the macro argument map in skipping included files while scanning for macro arg FileIDs that lexed from a specific source file. Due to padding, the new field does not increase the size of SLocEntry. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@138225 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/SourceManager.h | 61 ++++++++++- include/clang/Lex/PreprocessorLexer.h | 14 ++- lib/Basic/SourceManager.cpp | 140 +++++++++++++++++++------- lib/Lex/PPLexerChange.cpp | 10 ++ lib/Lex/PreprocessorLexer.cpp | 8 ++ lib/Serialization/ASTReader.cpp | 6 +- lib/Serialization/ASTWriter.cpp | 3 + 7 files changed, 193 insertions(+), 49 deletions(-) diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h index beff66e675..1cc02df802 100644 --- a/include/clang/Basic/SourceManager.h +++ b/include/clang/Basic/SourceManager.h @@ -23,6 +23,7 @@ #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/MemoryBuffer.h" +#include #include #include @@ -34,6 +35,8 @@ class FileManager; class FileEntry; class LineTableInfo; class LangOptions; +class ASTWriter; +class ASTReader; /// SrcMgr - Public enums and private classes that are part of the /// SourceManager implementation. @@ -85,6 +88,11 @@ namespace SrcMgr { /// if SourceLineCache is non-null. unsigned NumLines; + /// \brief Lazily computed map of macro argument chunks to their expanded + /// source location. + typedef std::map MacroArgsMap; + MacroArgsMap *MacroArgsCache; + /// getBuffer - Returns the memory buffer for the associated content. /// /// \param Diag Object through which diagnostics will be emitted if the @@ -142,11 +150,11 @@ namespace SrcMgr { ContentCache(const FileEntry *Ent = 0) : Buffer(0, false), OrigEntry(Ent), ContentsEntry(Ent), - SourceLineCache(0), NumLines(0) {} + SourceLineCache(0), NumLines(0), MacroArgsCache(0) {} ContentCache(const FileEntry *Ent, const FileEntry *contentEnt) : Buffer(0, false), OrigEntry(Ent), ContentsEntry(contentEnt), - SourceLineCache(0), NumLines(0) {} + SourceLineCache(0), NumLines(0), MacroArgsCache(0) {} ~ContentCache(); @@ -154,12 +162,13 @@ namespace SrcMgr { /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory /// is not transferred, so this is a logical error. ContentCache(const ContentCache &RHS) - : Buffer(0, false), SourceLineCache(0) + : Buffer(0, false), SourceLineCache(0), MacroArgsCache(0) { OrigEntry = RHS.OrigEntry; ContentsEntry = RHS.ContentsEntry; - assert (RHS.Buffer.getPointer() == 0 && RHS.SourceLineCache == 0 + assert (RHS.Buffer.getPointer() == 0 && RHS.SourceLineCache == 0 && + RHS.MacroArgsCache == 0 && "Passed ContentCache object cannot own a buffer."); NumLines = RHS.NumLines; @@ -184,16 +193,26 @@ namespace SrcMgr { /// This is an invalid SLOC for the main file (top of the #include chain). unsigned IncludeLoc; // Really a SourceLocation + /// \brief Number of FileIDs (files and macros) that were created during + /// preprocessing of this #include, including this SLocEntry. + /// Zero means the preprocessor didn't provide such info for this SLocEntry. + unsigned NumCreatedFIDs; + /// Data - This contains the ContentCache* and the bits indicating the /// characteristic of the file and whether it has #line info, all bitmangled /// together. uintptr_t Data; + + friend class SourceManager; + friend class ASTWriter; + friend class ASTReader; public: /// get - Return a FileInfo object. static FileInfo get(SourceLocation IL, const ContentCache *Con, CharacteristicKind FileCharacter) { FileInfo X; X.IncludeLoc = IL.getRawEncoding(); + X.NumCreatedFIDs = 0; X.Data = (uintptr_t)Con; assert((X.Data & 7) == 0 &&"ContentCache pointer insufficiently aligned"); assert((unsigned)FileCharacter < 4 && "invalid file character"); @@ -711,6 +730,28 @@ public: /// \param Invalid If non-NULL, will be set true if an error occurred. StringRef getBufferData(FileID FID, bool *Invalid = 0) const; + /// \brief Get the number of FileIDs (files and macros) that were created + /// during preprocessing of \arg FID, including it. + unsigned getNumCreatedFIDsForFileID(FileID FID) const { + bool Invalid = false; + const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid); + if (Invalid || !Entry.isFile()) + return 0; + + return Entry.getFile().NumCreatedFIDs; + } + + /// \brief Set the number of FileIDs (files and macros) that were created + /// during preprocessing of \arg FID, including it. + void setNumCreatedFIDsForFileID(FileID FID, unsigned NumFIDs) const { + bool Invalid = false; + const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid); + if (Invalid || !Entry.isFile()) + return; + + assert(Entry.getFile().NumCreatedFIDs == 0 && "Already set!"); + const_cast(Entry.getFile()).NumCreatedFIDs = NumFIDs; + } //===--------------------------------------------------------------------===// // SourceLocation manipulation methods. @@ -743,6 +784,17 @@ public: return SourceLocation::getFileLoc(FileOffset); } + /// \brief Returns the include location if \arg FID is a #include'd file + /// otherwise it returns an invalid location. + SourceLocation getIncludeLoc(FileID FID) const { + bool Invalid = false; + const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid); + if (Invalid || !Entry.isFile()) + return SourceLocation(); + + return Entry.getFile().getIncludeLoc(); + } + /// getExpansionLoc - Given a SourceLocation object, return the expansion /// location referenced by the ID. SourceLocation getExpansionLoc(SourceLocation Loc) const { @@ -1205,6 +1257,7 @@ private: std::pair getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E, unsigned Offset) const; + void computeMacroArgsCache(SrcMgr::ContentCache *Content, FileID FID); }; diff --git a/include/clang/Lex/PreprocessorLexer.h b/include/clang/Lex/PreprocessorLexer.h index 91e097e90e..e2e30bf878 100644 --- a/include/clang/Lex/PreprocessorLexer.h +++ b/include/clang/Lex/PreprocessorLexer.h @@ -30,6 +30,9 @@ protected: /// The SourceManager FileID corresponding to the file being lexed. const FileID FID; + /// \brief Number of SLocEntries before lexing the file. + unsigned InitialNumSLocEntries; + //===--------------------------------------------------------------------===// // Context-specific lexing flags set by the preprocessor. //===--------------------------------------------------------------------===// @@ -67,12 +70,10 @@ protected: void operator=(const PreprocessorLexer&); // DO NOT IMPLEMENT friend class Preprocessor; - PreprocessorLexer(Preprocessor *pp, FileID fid) - : PP(pp), FID(fid), ParsingPreprocessorDirective(false), - ParsingFilename(false), LexingRawMode(false) {} + PreprocessorLexer(Preprocessor *pp, FileID fid); PreprocessorLexer() - : PP(0), + : PP(0), InitialNumSLocEntries(0), ParsingPreprocessorDirective(false), ParsingFilename(false), LexingRawMode(false) {} @@ -151,6 +152,11 @@ public: return FID; } + /// \brief Number of SLocEntries before lexing the file. + unsigned getInitialNumSLocEntries() const { + return InitialNumSLocEntries; + } + /// getFileEntry - Return the FileEntry corresponding to this FileID. Like /// getFileID(), this only works for lexers with attached preprocessors. const FileEntry *getFileEntry() const; diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp index 2d8a47d089..79756bb8ef 100644 --- a/lib/Basic/SourceManager.cpp +++ b/lib/Basic/SourceManager.cpp @@ -17,6 +17,7 @@ #include "clang/Basic/FileManager.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -38,6 +39,7 @@ using llvm::MemoryBuffer; ContentCache::~ContentCache() { if (shouldFreeBuffer()) delete Buffer.getPointer(); + delete MacroArgsCache; } /// getSizeBytesMapped - Returns the number of bytes actually mapped for this @@ -1452,22 +1454,21 @@ SourceLocation SourceManager::translateFileLineCol(const FileEntry *SourceFile, return getLocForStartOfFile(FirstFID).getFileLocWithOffset(FilePos + Col - 1); } -/// \brief If \arg Loc points inside a function macro argument, the returned -/// location will be the macro location in which the argument was expanded. -/// If a macro argument is used multiple times, the expanded location will -/// be at the first expansion of the argument. -/// e.g. -/// MY_MACRO(foo); -/// ^ -/// Passing a file location pointing at 'foo', will yield a macro location -/// where 'foo' was expanded into. -SourceLocation SourceManager::getMacroArgExpandedLocation(SourceLocation Loc) { - if (Loc.isInvalid()) - return Loc; - - FileID FID = getFileID(Loc); - if (FID.isInvalid()) - return Loc; +/// \brief Compute a map of macro argument chunks to their expanded source +/// location. Chunks that are not part of a macro argument will map to an +/// invalid source location. e.g. if a file contains one macro argument at +/// offset 100 with length 10, this is how the map will be formed: +/// 0 -> SourceLocation() +/// 100 -> Expanded macro arg location +/// 110 -> SourceLocation() +void SourceManager::computeMacroArgsCache(ContentCache *Content, FileID FID) { + assert(!Content->MacroArgsCache); + assert(!FID.isInvalid()); + + Content->MacroArgsCache = new ContentCache::MacroArgsMap(); + ContentCache::MacroArgsMap &MacroArgsCache = *Content->MacroArgsCache; + // Initially no macro argument chunk is present. + MacroArgsCache.insert(std::make_pair(0, SourceLocation())); int ID = FID.ID; while (1) { @@ -1475,41 +1476,99 @@ SourceLocation SourceManager::getMacroArgExpandedLocation(SourceLocation Loc) { // Stop if there are no more FileIDs to check. if (ID > 0) { if (unsigned(ID) >= local_sloc_entry_size()) - return Loc; + return; } else if (ID == -1) { - return Loc; + return; } const SrcMgr::SLocEntry &Entry = getSLocEntryByID(ID); if (Entry.isFile()) { - if (Entry.getFile().getIncludeLoc().isValid() && - !isBeforeInTranslationUnit(Entry.getFile().getIncludeLoc(), Loc)) - return Loc; + SourceLocation IncludeLoc = Entry.getFile().getIncludeLoc(); + if (IncludeLoc.isInvalid()) + continue; + if (!isInFileID(IncludeLoc, FID)) + return; // No more files/macros that may be "contained" in this file. + + // Skip the files/macros of the #include'd file, we only care about macros + // that lexed macro arguments from our file. + if (Entry.getFile().NumCreatedFIDs) + ID += Entry.getFile().NumCreatedFIDs - 1/*because of next ++ID*/; continue; } - - if (isBeforeInTranslationUnit(Loc, - Entry.getExpansion().getExpansionLocStart())) - return Loc; + if (!Entry.getExpansion().isMacroArgExpansion()) continue; - - // This is a macro argument expansion. See if Loc points in the argument - // that was lexed. - - SourceLocation SpellLoc = Entry.getExpansion().getSpellingLoc(); - unsigned BeginOffs = SpellLoc.getOffset(); + + SourceLocation SpellLoc = + getSpellingLoc(Entry.getExpansion().getSpellingLoc()); + unsigned BeginOffs; + if (!isInFileID(SpellLoc, FID, &BeginOffs)) + return; // No more files/macros that may be "contained" in this file. unsigned EndOffs = BeginOffs + getFileIDSize(FileID::get(ID)); - if (BeginOffs <= Loc.getOffset() && Loc.getOffset() < EndOffs) { - SourceLocation ExpandLoc = SourceLocation::getMacroLoc(Entry.getOffset()); - // Replace current Loc with the expanded location and continue. - // The expanded argument may end up being passed to another function macro - // and relexed again. - Loc = ExpandLoc.getFileLocWithOffset(Loc.getOffset()-BeginOffs); - } + + // Add a new chunk for this macro argument. A previous macro argument chunk + // may have been lexed again, so e.g. if the map is + // 0 -> SourceLocation() + // 100 -> Expanded loc #1 + // 110 -> SourceLocation() + // and we found a new macro FileID that lexed from offet 105 with length 3, + // the new map will be: + // 0 -> SourceLocation() + // 100 -> Expanded loc #1 + // 105 -> Expanded loc #2 + // 108 -> Expanded loc #1 + // 110 -> SourceLocation() + // + // Since re-lexed macro chunks will always be the same size or less of + // previous chunks, we only need to find where the ending of the new macro + // chunk is mapped to and update the map with new begin/end mappings. + + ContentCache::MacroArgsMap::iterator I= MacroArgsCache.upper_bound(EndOffs); + --I; + SourceLocation EndOffsMappedLoc = I->second; + MacroArgsCache[BeginOffs] = SourceLocation::getMacroLoc(Entry.getOffset()); + MacroArgsCache[EndOffs] = EndOffsMappedLoc; } } +/// \brief If \arg Loc points inside a function macro argument, the returned +/// location will be the macro location in which the argument was expanded. +/// If a macro argument is used multiple times, the expanded location will +/// be at the first expansion of the argument. +/// e.g. +/// MY_MACRO(foo); +/// ^ +/// Passing a file location pointing at 'foo', will yield a macro location +/// where 'foo' was expanded into. +SourceLocation SourceManager::getMacroArgExpandedLocation(SourceLocation Loc) { + if (Loc.isInvalid() || !Loc.isFileID()) + return Loc; + + FileID FID; + unsigned Offset; + llvm::tie(FID, Offset) = getDecomposedLoc(Loc); + if (FID.isInvalid()) + return Loc; + + ContentCache *Content + = const_cast(getSLocEntry(FID).getFile().getContentCache()); + if (!Content->MacroArgsCache) + computeMacroArgsCache(Content, FID); + + assert(Content->MacroArgsCache); + assert(!Content->MacroArgsCache->empty()); + ContentCache::MacroArgsMap::iterator + I = Content->MacroArgsCache->upper_bound(Offset); + --I; + + unsigned MacroArgBeginOffs = I->first; + SourceLocation MacroArgExpandedLoc = I->second; + if (MacroArgExpandedLoc.isValid()) + return MacroArgExpandedLoc.getFileLocWithOffset(Offset - MacroArgBeginOffs); + + return Loc; +} + /// Given a decomposed source location, move it up the include/expansion stack /// to the parent source location. If this is possible, return the decomposed /// version of the parent in Loc and return false. If Loc is the top-level @@ -1617,14 +1676,17 @@ void SourceManager::PrintStats() const { << "B of Sloc address space used.\n"; unsigned NumLineNumsComputed = 0; + unsigned NumMacroArgsComputed = 0; unsigned NumFileBytesMapped = 0; for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){ NumLineNumsComputed += I->second->SourceLineCache != 0; + NumMacroArgsComputed += I->second->MacroArgsCache != 0; NumFileBytesMapped += I->second->getSizeBytesMapped(); } llvm::errs() << NumFileBytesMapped << " bytes of files mapped, " - << NumLineNumsComputed << " files with line #'s computed.\n"; + << NumLineNumsComputed << " files with line #'s computed, " + << NumMacroArgsComputed << " files with macro args computed.\n"; llvm::errs() << "FileID scans: " << NumLinearScans << " linear, " << NumBinaryProbes << " binary.\n"; } diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index bf28199b88..cb7b279358 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -204,6 +204,16 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { // If this is a #include'd file, pop it off the include stack and continue // lexing the #includer file. if (!IncludeMacroStack.empty()) { + if (!isEndOfMacro && CurPPLexer && + SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) { + // Notify SourceManager to record the number of FileIDs that were created + // during lexing of the #include'd file. + unsigned NumFIDs = + SourceMgr.local_sloc_entry_size() - + CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/; + SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs); + } + // We're done with the #included file. RemoveTopOfLexerStack(); diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index 808a81bd5e..0da9ef5531 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -17,6 +17,14 @@ #include "clang/Basic/SourceManager.h" using namespace clang; +PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid) + : PP(pp), FID(fid), InitialNumSLocEntries(0), + ParsingPreprocessorDirective(false), + ParsingFilename(false), LexingRawMode(false) { + if (pp) + InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size(); +} + /// LexIncludeFilename - After the preprocessor has parsed a #include, lex and /// (potentially) macro expand the filename. void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) { diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index 768d5db7e3..7a6a6039f3 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -1320,9 +1320,11 @@ ASTReader::ASTReadResult ASTReader::ReadSLocEntryRecord(int ID) { FileID FID = SourceMgr.createFileID(File, IncludeLoc, (SrcMgr::CharacteristicKind)Record[2], ID, BaseOffset + Record[0]); + SrcMgr::FileInfo &FileInfo = + const_cast(SourceMgr.getSLocEntry(FID).getFile()); + FileInfo.NumCreatedFIDs = Record[6]; if (Record[3]) - const_cast(SourceMgr.getSLocEntry(FID).getFile()) - .setHasLineDirectives(); + FileInfo.setHasLineDirectives(); break; } diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index 15a7cbc628..acd05006c3 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -1243,6 +1243,7 @@ static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) { // FileEntry fields. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumCreatedFIDs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name return Stream.EmitAbbrev(Abbrev); } @@ -1515,6 +1516,8 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, Record.push_back(Content->OrigEntry->getSize()); Record.push_back(Content->OrigEntry->getModificationTime()); + Record.push_back(File.NumCreatedFIDs); + // Turn the file name into an absolute path, if it isn't already. const char *Filename = Content->OrigEntry->getName(); llvm::SmallString<128> FilePath(Filename); -- 2.40.0