From: Anna Zaks Date: Thu, 12 Apr 2012 22:36:48 +0000 (+0000) Subject: [analyzer] PCH deserialization optimization. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3;p=clang [analyzer] PCH deserialization optimization. We should not deserialize unused declarations from the PCH file. Achieve this by storing the top level declarations during parsing (HandleTopLevelDecl ASTConsumer callback) and analyzing/building a call graph only for those. Tested the patch on a sample ObjC file that uses PCH. With the patch, the analyzes is 17.5% faster and clang consumes 40% less memory. Got about 10% overall build/analyzes time decrease on a large Objective C project. A bit of CallGraph refactoring/cleanup as well.. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@154625 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Analysis/CallGraph.h b/include/clang/Analysis/CallGraph.h index f1b6e64772..9b6807343c 100644 --- a/include/clang/Analysis/CallGraph.h +++ b/include/clang/Analysis/CallGraph.h @@ -18,6 +18,7 @@ #define LLVM_CLANG_ANALYSIS_CALLGRAPH #include "clang/AST/DeclBase.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SetVector.h" @@ -25,8 +26,14 @@ namespace clang { class CallGraphNode; -class CallGraph { +/// \class The AST-based call graph. +/// +/// The call graph extends itself with the given declarations by implementing +/// the recursive AST visitor, which constructs the graph by visiting the given +/// declarations. +class CallGraph : public RecursiveASTVisitor { friend class CallGraphNode; + typedef llvm::DenseMap FunctionMapTy; /// FunctionMap owns all CallGraphNodes. @@ -45,19 +52,23 @@ public: CallGraph(); ~CallGraph(); - /// \brief Add the given declaration to the call graph. - void addToCallGraph(Decl *D, bool IsGlobal); + /// \brief Populate the call graph with the functions in the given + /// declaration. + /// + /// Recursively walks the declaration to find all the dependent Decls as well. + void addToCallGraph(Decl *D) { + TraverseDecl(D); + } - /// \brief Populate the call graph with the functions in the given translation - /// unit. - void addToCallGraph(TranslationUnitDecl *TU); + /// \brief Determine if a declaration should be included in the graph. + static bool includeInGraph(const Decl *D); /// \brief Lookup the node for the given declaration. CallGraphNode *getNode(const Decl *) const; /// \brief Lookup the node for the given declaration. If none found, insert /// one into the graph. - CallGraphNode *getOrInsertFunction(Decl *); + CallGraphNode *getOrInsertNode(Decl *); /// Iterators through all the elements in the graph. Note, this gives /// non-deterministic order. @@ -90,6 +101,32 @@ public: void print(raw_ostream &os) const; void dump() const; void viewGraph() const; + + /// Part of recursive declaration visitation. + bool VisitFunctionDecl(FunctionDecl *FD) { + // We skip function template definitions, as their semantics is + // only determined when they are instantiated. + if (includeInGraph(FD)) + // If this function has external linkage, anything could call it. + // Note, we are not precise here. For example, the function could have + // its address taken. + addNodeForDecl(FD, FD->isGlobal()); + return true; + } + + /// Part of recursive declaration visitation. + bool VisitObjCMethodDecl(ObjCMethodDecl *MD) { + if (includeInGraph(MD)) + addNodeForDecl(MD, true); + return true; + } + +private: + /// \brief Add the given declaration to the call graph. + void addNodeForDecl(Decl *D, bool IsGlobal); + + /// \brief Allocate a new node in the graph. + CallGraphNode *allocateNewNode(Decl *); }; class CallGraphNode { diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h index d9156441db..59136fc314 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h @@ -82,7 +82,7 @@ private: /// The functions which have been analyzed through inlining. This is owned by /// AnalysisConsumer. It can be null. - SetOfDecls *AnalyzedCallees; + SetOfConstDecls *AnalyzedCallees; /// The information about functions shared by the whole translation unit. /// (This data is owned by AnalysisConsumer.) @@ -109,7 +109,7 @@ private: public: /// Construct a CoreEngine object to analyze the provided CFG using /// a DFS exploration of the exploded graph. - CoreEngine(SubEngine& subengine, SetOfDecls *VisitedCallees, + CoreEngine(SubEngine& subengine, SetOfConstDecls *VisitedCallees, FunctionSummariesTy *FS) : SubEng(subengine), G(new ExplodedGraph()), WList(WorkList::makeBFS()), diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index 7b31172e33..2a21a03e9a 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -91,7 +91,8 @@ class ExprEngine : public SubEngine { GRBugReporter BR; public: - ExprEngine(AnalysisManager &mgr, bool gcEnabled, SetOfDecls *VisitedCallees, + ExprEngine(AnalysisManager &mgr, bool gcEnabled, + SetOfConstDecls *VisitedCallees, FunctionSummariesTy *FS); ~ExprEngine(); diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h index 33943d9f1c..42adff3e2b 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h @@ -21,7 +21,8 @@ namespace clang { namespace ento { -typedef llvm::SmallPtrSet SetOfDecls; +typedef llvm::SmallPtrSet SetOfDecls; +typedef llvm::SmallPtrSet SetOfConstDecls; class FunctionSummariesTy { struct FunctionSummary { diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index 01d6c41f91..96a16c3afe 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -14,35 +14,12 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtVisitor.h" #include "llvm/Support/GraphWriter.h" using namespace clang; -/// Determine if a declaration should be included in the graph. -static bool includeInGraph(const Decl *D) { - if (const FunctionDecl *FD = dyn_cast(D)) { - // We skip function template definitions, as their semantics is - // only determined when they are instantiated. - if (!FD->isThisDeclarationADefinition() || - FD->isDependentContext()) - return false; - - IdentifierInfo *II = FD->getIdentifier(); - if (II && II->getName().startswith("__inline")) - return false; - } - - if (const ObjCMethodDecl *ID = dyn_cast(D)) { - if (!ID->isThisDeclarationADefinition()) - return false; - } - - return true; -} - namespace { /// A helper class, which walks the AST and locates all the call sites in the /// given function body. @@ -60,8 +37,8 @@ public: void VisitCallExpr(CallExpr *CE) { // TODO: We need to handle ObjC method calls as well. if (FunctionDecl *CalleeDecl = CE->getDirectCallee()) - if (includeInGraph(CalleeDecl)) { - CallGraphNode *CalleeNode = G->getOrInsertFunction(CalleeDecl); + if (G->includeInGraph(CalleeDecl)) { + CallGraphNode *CalleeNode = G->getOrInsertNode(CalleeDecl); CallerNode->addCallee(CalleeNode, G); } } @@ -73,38 +50,10 @@ public: } }; -/// A helper class which walks the AST declarations. -// TODO: We might want to specialize the visitor to shrink the call graph. -// For example, we might not want to include the inline methods from header -// files. -class CGDeclVisitor : public RecursiveASTVisitor { - CallGraph *CG; - -public: - CGDeclVisitor(CallGraph * InCG) : CG(InCG) {} - - bool VisitFunctionDecl(FunctionDecl *FD) { - // We skip function template definitions, as their semantics is - // only determined when they are instantiated. - if (includeInGraph(FD)) - // If this function has external linkage, anything could call it. - // Note, we are not precise here. For example, the function could have - // its address taken. - CG->addToCallGraph(FD, FD->isGlobal()); - return true; - } - - bool VisitObjCMethodDecl(ObjCMethodDecl *MD) { - if (includeInGraph(MD)) - CG->addToCallGraph(MD, true); - return true; - } -}; - } // end anonymous namespace CallGraph::CallGraph() { - Root = getOrInsertFunction(0); + Root = getOrInsertNode(0); } CallGraph::~CallGraph() { @@ -116,10 +65,36 @@ CallGraph::~CallGraph() { } } -void CallGraph::addToCallGraph(Decl* D, bool IsGlobal) { +bool CallGraph::includeInGraph(const Decl *D) { + if (const FunctionDecl *FD = dyn_cast(D)) { + // We skip function template definitions, as their semantics is + // only determined when they are instantiated. + if (!FD->isThisDeclarationADefinition() || + FD->isDependentContext()) + return false; + + IdentifierInfo *II = FD->getIdentifier(); + if (II && II->getName().startswith("__inline")) + return false; + } + + if (const ObjCMethodDecl *ID = dyn_cast(D)) { + if (!ID->isThisDeclarationADefinition()) + return false; + } + + return true; +} + +void CallGraph::addNodeForDecl(Decl* D, bool IsGlobal) { assert(D); - CallGraphNode *Node = getOrInsertFunction(D); + // Do nothing if the node already exists. + if (FunctionMap.find(D) != FunctionMap.end()) + return; + + // Allocate a new node, mark it as root, and process it's calls. + CallGraphNode *Node = getOrInsertNode(D); if (IsGlobal) Root->addCallee(Node, this); @@ -129,17 +104,13 @@ void CallGraph::addToCallGraph(Decl* D, bool IsGlobal) { builder.Visit(Body); } -void CallGraph::addToCallGraph(TranslationUnitDecl *TU) { - CGDeclVisitor(this).TraverseDecl(TU); -} - CallGraphNode *CallGraph::getNode(const Decl *F) const { FunctionMapTy::const_iterator I = FunctionMap.find(F); if (I == FunctionMap.end()) return 0; return I->second; } -CallGraphNode *CallGraph::getOrInsertFunction(Decl *F) { +CallGraphNode *CallGraph::getOrInsertNode(Decl *F) { CallGraphNode *&Node = FunctionMap[F]; if (Node) return Node; diff --git a/lib/StaticAnalyzer/Core/ExprEngine.cpp b/lib/StaticAnalyzer/Core/ExprEngine.cpp index 30a511d686..d2da9aaccb 100644 --- a/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -67,7 +67,7 @@ static inline Selector GetNullarySelector(const char* name, ASTContext &Ctx) { //===----------------------------------------------------------------------===// ExprEngine::ExprEngine(AnalysisManager &mgr, bool gcEnabled, - SetOfDecls *VisitedCallees, + SetOfConstDecls *VisitedCallees, FunctionSummariesTy *FS) : AMgr(mgr), AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()), diff --git a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp index 756ef32615..c19ebcbcc4 100644 --- a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp +++ b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Timer.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include @@ -95,6 +96,13 @@ public: AnalyzerOptions Opts; ArrayRef Plugins; + /// \brief Stores the declarations from the local translation unit. + /// Note, we pre-compute the local declarations at parse time as an + /// optimization to make sure we do not deserialize everything from disk. + /// The local declaration to all declarations ratio might be very small when + /// working with a PCH file. + SetOfDecls LocalTUDecls; + // PD is owned by AnalysisManager. PathDiagnosticConsumer *PD; @@ -214,11 +222,16 @@ public: Opts.NoRetryExhausted)); } + /// \brief Store the top level decls in the set to be processed later on. + /// (Doing this pre-processing avoids deserialization of data from PCH.) + virtual bool HandleTopLevelDecl(DeclGroupRef D); + virtual void HandleTopLevelDeclInObjCContainer(DeclGroupRef D); + virtual void HandleTranslationUnit(ASTContext &C); - /// \brief Build the call graph for the TU and use it to define the order - /// in which the functions should be visited. - void HandleDeclsGallGraph(TranslationUnitDecl *TU); + /// \brief Build the call graph for all the top level decls of this TU and + /// use it to define the order in which the functions should be visited. + void HandleDeclsGallGraph(); /// \brief Run analyzes(syntax or path sensitive) on the given function. /// \param Mode - determines if we are requesting syntax only or path @@ -226,13 +239,12 @@ public: /// \param VisitedCallees - The output parameter, which is populated with the /// set of functions which should be considered analyzed after analyzing the /// given root function. - void HandleCode(Decl *D, AnalysisMode Mode, SetOfDecls *VisitedCallees = 0); - - /// \brief Check if we should skip (not analyze) the given function. - bool skipFunction(Decl *D); + void HandleCode(Decl *D, AnalysisMode Mode, + SetOfConstDecls *VisitedCallees = 0); - void RunPathSensitiveChecks(Decl *D, SetOfDecls *VisitedCallees); - void ActionExprEngine(Decl *D, bool ObjCGCEnabled, SetOfDecls *VisitedCallees); + void RunPathSensitiveChecks(Decl *D, SetOfConstDecls *VisitedCallees); + void ActionExprEngine(Decl *D, bool ObjCGCEnabled, + SetOfConstDecls *VisitedCallees); /// Visitors for the RecursiveASTVisitor. @@ -262,6 +274,13 @@ public: HandleCode(MD, RecVisitorMode); return true; } + +private: + void storeTopLevelDecls(DeclGroupRef DG); + + /// \brief Check if we should skip (not analyze) the given function. + bool skipFunction(Decl *D); + }; } // end anonymous namespace @@ -271,11 +290,35 @@ public: //===----------------------------------------------------------------------===// llvm::Timer* AnalysisConsumer::TUTotalTimer = 0; -void AnalysisConsumer::HandleDeclsGallGraph(TranslationUnitDecl *TU) { +bool AnalysisConsumer::HandleTopLevelDecl(DeclGroupRef DG) { + storeTopLevelDecls(DG); + return true; +} + +void AnalysisConsumer::HandleTopLevelDeclInObjCContainer(DeclGroupRef DG) { + storeTopLevelDecls(DG); +} + +void AnalysisConsumer::storeTopLevelDecls(DeclGroupRef DG) { + for (DeclGroupRef::iterator I = DG.begin(), E = DG.end(); I != E; ++I) { + + // Skip ObjCMethodDecl, wait for the objc container to avoid + // analyzing twice. + if (isa(*I)) + continue; + + LocalTUDecls.insert(*I); + } +} + +void AnalysisConsumer::HandleDeclsGallGraph() { // Otherwise, use the Callgraph to derive the order. // Build the Call Graph. CallGraph CG; - CG.addToCallGraph(TU); + // Add all the top level declarations to the graph. + for (SetOfDecls::iterator I = LocalTUDecls.begin(), + E = LocalTUDecls.end(); I != E; ++I) + CG.addToCallGraph(*I); // Find the top level nodes - children of root + the unreachable (parentless) // nodes. @@ -316,15 +359,15 @@ void AnalysisConsumer::HandleDeclsGallGraph(TranslationUnitDecl *TU) { continue; // Analyze the function. - SetOfDecls VisitedCallees; + SetOfConstDecls VisitedCallees; Decl *D = N->getDecl(); assert(D); HandleCode(D, ANALYSIS_PATH, (Mgr->InliningMode == All ? 0 : &VisitedCallees)); // Add the visited callees to the global visited set. - for (SetOfDecls::const_iterator I = VisitedCallees.begin(), - E = VisitedCallees.end(); I != E; ++I) { + for (SetOfConstDecls::const_iterator I = VisitedCallees.begin(), + E = VisitedCallees.end(); I != E; ++I){ CallGraphNode *VN = CG.getNode(*I); if (VN) Visited.insert(VN); @@ -358,10 +401,14 @@ void AnalysisConsumer::HandleTranslationUnit(ASTContext &C) { // sensitive analyzes as well. RecVisitorMode = (Mgr->shouldInlineCall() ? ANALYSIS_SYNTAX : ANALYSIS_ALL); RecVisitorBR = &BR; - TraverseDecl(TU); + + // Process all the top level declarations. + for (SetOfDecls::iterator I = LocalTUDecls.begin(), + E = LocalTUDecls.end(); I != E; ++I) + TraverseDecl(*I); if (Mgr->shouldInlineCall()) - HandleDeclsGallGraph(TU); + HandleDeclsGallGraph(); // After all decls handled, run checkers on the entire TranslationUnit. checkerMgr->runCheckersOnEndOfTranslationUnit(TU, *Mgr, BR); @@ -424,7 +471,7 @@ bool AnalysisConsumer::skipFunction(Decl *D) { } void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode, - SetOfDecls *VisitedCallees) { + SetOfConstDecls *VisitedCallees) { if (skipFunction(D)) return; @@ -458,7 +505,7 @@ void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode, //===----------------------------------------------------------------------===// void AnalysisConsumer::ActionExprEngine(Decl *D, bool ObjCGCEnabled, - SetOfDecls *VisitedCallees) { + SetOfConstDecls *VisitedCallees) { // Construct the analysis engine. First check if the CFG is valid. // FIXME: Inter-procedural analysis will need to handle invalid CFGs. if (!Mgr->getCFG(D)) @@ -489,7 +536,8 @@ void AnalysisConsumer::ActionExprEngine(Decl *D, bool ObjCGCEnabled, Eng.getBugReporter().FlushReports(); } -void AnalysisConsumer::RunPathSensitiveChecks(Decl *D, SetOfDecls *Visited) { +void AnalysisConsumer::RunPathSensitiveChecks(Decl *D, + SetOfConstDecls *Visited) { switch (Mgr->getLangOpts().getGC()) { case LangOptions::NonGC: diff --git a/test/Analysis/check-deserialization.cpp b/test/Analysis/check-deserialization.cpp new file mode 100644 index 0000000000..2b0bce2b9a --- /dev/null +++ b/test/Analysis/check-deserialization.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -emit-pch -o %t %s +// RUN: %clang_cc1 -error-on-deserialized-decl S1_method -include-pch %t -analyze -analyzer-checker=core %s +// RUN: %clang_cc1 -include-pch %t -analyze -analyzer-checker=core -verify %s + +#ifndef HEADER +#define HEADER +// Header. + +void S1_method(); // This should not be deserialized. + + +#else +// Using the header. + +int test() { + int x = 0; + return 5/x; //expected-warning {{Division by zero}} +} + +#endif diff --git a/test/Analysis/objc-method-coverage.m b/test/Analysis/objc-method-coverage.m new file mode 100644 index 0000000000..056aafe518 --- /dev/null +++ b/test/Analysis/objc-method-coverage.m @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=core -analyzer-ipa=inlining -analyzer-stats -fblocks %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -analyze -analyzer-checker=core -analyzer-ipa=none -analyzer-stats -fblocks %s 2>&1 | FileCheck %s + +@interface I +int f() { + return 0; +} +@end + +@implementation I ++ (void *)ff{ + return (void*)0; +} +@end + +// CHECK: ... Statistics Collected ... +// CHECK: 2 AnalysisConsumer - The # of functions analysed (as top level). \ No newline at end of file