From fddd51853f8ccaa1df2476376e6fd74d2f315c73 Mon Sep 17 00:00:00 2001 From: Ted Kremenek Date: Tue, 21 Aug 2007 21:42:03 +0000 Subject: [PATCH] Added CFG infrastructure (CFG.cpp and CFG.h) for clang ASTs. Added builder code to translate ASTs to CFGs. This currently supports if, return, and non-control flow statements. Added pretty-printer to debug CFGs. Added a "-dump-cfg" option to the clang driver to dump CFGs for code sent through the frontend. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@41252 91177308-0d34-0410-b5e6-96231b3b80d8 --- AST/CFG.cpp | 351 ++++++++++++++++++++++++++++++++++++++++ Driver/ASTStreamers.cpp | 27 ++++ Driver/ASTStreamers.h | 1 + Driver/clang.cpp | 6 + include/clang/AST/CFG.h | 196 ++++++++++++++++++++++ 5 files changed, 581 insertions(+) create mode 100644 AST/CFG.cpp create mode 100644 include/clang/AST/CFG.h diff --git a/AST/CFG.cpp b/AST/CFG.cpp new file mode 100644 index 0000000000..73d2408292 --- /dev/null +++ b/AST/CFG.cpp @@ -0,0 +1,351 @@ +//===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Ted Kremenek and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CFG and CFGBuilder classes for representing and +// building Control-Flow Graphs (CFGs) from ASTs. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CFG.h" +#include "clang/AST/Expr.h" +#include +#include +#include +using namespace clang; + +namespace { + + // SaveAndRestore - A utility class that uses RIIA to save and restore + // the value of a variable. + template + struct SaveAndRestore { + SaveAndRestore(T& x) : X(x), old_value(x) {} + ~SaveAndRestore() { X = old_value; } + + T& X; + T old_value; + }; +} + +/// CFGBuilder - This class is implements CFG construction from an AST. +/// The builder is stateful: an instance of the builder should be used to only +/// construct a single CFG. +/// +/// Example usage: +/// +/// CFGBuilder builder; +/// CFG* cfg = builder.BuildAST(stmt1); +/// +class CFGBuilder { + CFG* cfg; + CFGBlock* Block; + CFGBlock* Exit; + CFGBlock* Succ; + unsigned NumBlocks; + +public: + explicit CFGBuilder() : cfg(NULL), Block(NULL), Exit(NULL), Succ(NULL), + NumBlocks(0) { + // Create an empty CFG. + cfg = new CFG(); + } + + ~CFGBuilder() { delete cfg; } + + /// buildCFG - Constructs a CFG from an AST (a Stmt*). The AST can + /// represent an arbitrary statement. Examples include a single expression + /// or a function body (compound statement). The ownership of the returned + /// CFG is transferred to the caller. If CFG construction fails, this method + /// returns NULL. + CFG* buildCFG(Stmt* Statement) { + if (!Statement) return NULL; + + assert (cfg && "CFGBuilder should only be used to construct one CFG"); + + // Create the exit block. + Block = createBlock(); + Exit = Block; + + // Visit the statements and create the CFG. + if (CFGBlock* B = visitStmt(Statement)) { + // Reverse the statements in the last constructed block. Statements + // are inserted into the blocks in reverse order. + B->reverseStmts(); + // NULL out cfg so that repeated calls + CFG* t = cfg; + cfg = NULL; + return t; + } + else { + // Error occured while building CFG: Delete the partially constructed CFG. + delete cfg; + cfg = NULL; + return NULL; + } + } + +private: + + // createBlock - Used to lazily create blocks that are connected + // to the current (global) succcessor. + CFGBlock* createBlock( bool add_successor = true ) { + CFGBlock* B = cfg->createBlock(NumBlocks++); + if (add_successor && Succ) B->addSuccessor(Succ); + return B; + } + + // visitStmt - CFG construction is done via a recursive walk of an AST. + // We actually parse the AST in reverse order so that the successor + // of a basic block is constructed prior to its predecessor. This + // allows us to nicely capture implicit fall-throughs without extra + // basic blocks. + // + // The value returned from this function is the last created CFGBlock + // that represents the "entry" point for the translated AST node. + CFGBlock* visitStmt(Stmt* Statement) { + assert (Statement && "visitStmt does not accept NULL Stmt*"); + + switch (Statement->getStmtClass()) { + default: + assert (false && "statement case for CFGBuilder not yet implemented"); + return NULL; + + // Statements with no branching control flow. + case Stmt::NullStmtClass: + case Stmt::DeclStmtClass: + case Stmt::PreDefinedExprClass: + case Stmt::DeclRefExprClass: + case Stmt::IntegerLiteralClass: + case Stmt::FloatingLiteralClass: + case Stmt::StringLiteralClass: + case Stmt::CharacterLiteralClass: + case Stmt::ParenExprClass: + case Stmt::UnaryOperatorClass: + case Stmt::SizeOfAlignOfTypeExprClass: + case Stmt::ArraySubscriptExprClass: + case Stmt::CallExprClass: + case Stmt::BinaryOperatorClass: + case Stmt::ImplicitCastExprClass: + case Stmt::CompoundLiteralExprClass: + case Stmt::OCUVectorElementExprClass: + // We cannot assume that we are in the middle of a basic block, since + // the CFG might only be constructed for this single statement. If + // we have no current basic block, just create one lazily. + if (!Block) Block = createBlock(); + + // Simply add the statement to the current block. We actually + // insert statements in reverse order; this order is reversed later + // when processing the containing element in the AST. + Block->appendStmt(Statement); + break; + + case Stmt::CompoundStmtClass: { + // Iterate through the statements of the compound statement in reverse + // order. Because this statement may contain statements that have + // complicated control flow, the value of "Block" may change at any + // time. This means that statements in the compound statement will + // automatically be distributed across multiple basic blocks when + // necessary. + CompoundStmt* C = cast(Statement); + + for (CompoundStmt::reverse_body_iterator I = C->body_rbegin(), + E = C->body_rend(); I != E; ++I ) + // Add the statement to the current block. + if (!visitStmt(*I)) return NULL; + + break; + } + + case Stmt::IfStmtClass: { + IfStmt* I = cast(Statement); + + // We may see an if statement in the middle of a basic block, or + // it may be the first statement we are processing. In either case, + // we create a new basic block. First, we create the blocks for + // the then...else statements, and then we create the block containing + // the if statement. If we were in the middle of a block, we + // stop processing that block and reverse its statements. That block + // is then the implicit successor for the "then" and "else" clauses. + + // The block we were proccessing is now finished. Make it the + // successor block. + if (Block) { + Succ = Block; + Block->reverseStmts(); + } + + // Process the false branch. NULL out Block so that the recursive + // call to visitStmt will create a new basic block. + // Null out Block so that all successor + CFGBlock* ElseBlock = Succ; + + if (Stmt* Else = I->getElse()) { + SaveAndRestore sv(Succ); + + // NULL out Block so that the recursive call to visitStmt will + // create a new basic block. + Block = NULL; + ElseBlock = visitStmt(Else); + if (!ElseBlock) return NULL; + ElseBlock->reverseStmts(); + } + + // Process the true branch. NULL out Block so that the recursive + // call to visitStmt will create a new basic block. + // Null out Block so that all successor + CFGBlock* ThenBlock; + { + Stmt* Then = I->getThen(); + assert (Then); + SaveAndRestore sv(Succ); + Block = NULL; + ThenBlock = visitStmt(Then); + if (!ThenBlock) return NULL; + ThenBlock->reverseStmts(); + } + + // Now create a new block containing the if statement. + Block = createBlock(false); + + // Add the condition as the last statement in the new block. + Block->appendStmt(I->getCond()); + + // Set the terminator of the new block to the If statement. + Block->setTerminator(I); + + // Now add the successors. + Block->addSuccessor(ThenBlock); + Block->addSuccessor(ElseBlock); + + break; + } + + case Stmt::ReturnStmtClass: { + ReturnStmt* R = cast(Statement); + + // If we were in the middle of a block we stop processing that block + // and reverse its statements. + // + // NOTE: If a "return" appears in the middle of a block, this means + // that the code afterwards is DEAD (unreachable). We still + // keep a basic block for that code; a simple "mark-and-sweep" + // from the entry block will be able to report such dead + // blocks. + if (Block) Block->reverseStmts(); + + // Create the new block. + Block = createBlock(false); + + // The Exit block is the only successor. + Block->addSuccessor(Exit); + + // Add the return expression to the block. + Block->appendStmt(R); + + // Add the return statement itself to the block. + if (R->getRetValue()) Block->appendStmt(R->getRetValue()); + + break; + } + } // end dispatch on statement class + + return Block; + } + +}; + +// BuildCFG - A helper function that builds CFGs from ASTS. +CFG* CFG::BuildCFG( Stmt* Statement ) { + CFGBuilder Builder; + return Builder.buildCFG(Statement); +} + +// reverseStmts - A method that reverses the order of the statements within +// a CFGBlock. +void CFGBlock::reverseStmts() { std::reverse(Stmts.begin(),Stmts.end()); } + +// dump - A simple pretty printer of a CFG that outputs to stderr. +void CFG::dump() { print(std::cerr); } + +// print - A simple pretty printer of a CFG that outputs to an ostream. +void CFG::print(std::ostream& OS) { + // Iterate through the CFGBlocks and print them one by one. Specially + // designate the Entry and Exit blocks. + for (iterator I = Blocks.begin(), E = Blocks.end() ; I != E ; ++I) { + OS << "\n [ B" << I->getBlockID(); + if (&(*I) == getExit()) OS << " (EXIT) ]\n"; + else if (&(*I) == getEntry()) OS << " (ENTRY) ]\n"; + else OS << " ]\n"; + I->print(OS); + } + OS << "\n"; +} + +// dump - A simply pretty printer of a CFGBlock that outputs to stderr. +void CFGBlock::dump() { print(std::cerr); } + +// print - A simple pretty printer of a CFGBlock that outputs to an ostream. +// Generally this will only be called from CFG::print. +void CFGBlock::print(std::ostream& OS) { + + // Iterate through the statements in the block and print them. + OS << " ------------------------\n"; + unsigned j = 1; + for (iterator I = Stmts.begin(), E = Stmts.end() ; I != E ; ++I, ++j ) { + OS << " " << std::setw(3) << j << ": "; + (*I)->printPretty(OS); + if (isa(*I)) OS << '\n'; + } + OS << " ------------------------\n"; + + // Print the predecessors of this block. + OS << " Predecessors (" << pred_size() << "):"; + unsigned i = 0; + for (pred_iterator I = pred_begin(), E = pred_end(); I != E; ++I, ++i ) { + if (i == 8 || (i-8) == 0) { + OS << "\n "; + } + OS << " B" << (*I)->getBlockID(); + } + + // Print the terminator of this block. + OS << "\n Terminator: "; + if (ControlFlowStmt) { + switch (ControlFlowStmt->getStmtClass()) { + case Stmt::IfStmtClass: { + IfStmt* I = cast(ControlFlowStmt); + OS << "if "; + I->getCond()->printPretty(std::cerr); + OS << "\n"; + break; + } + + case Stmt::ReturnStmtClass: { + ReturnStmt* R = cast(ControlFlowStmt); + R->printPretty(std::cerr); + break; + } + + default: + assert(false && "terminator print not fully implemented"); + } + } + else OS << "\n"; + + // Print the successors of this block. + OS << " Successors (" << succ_size() << "):"; + i = 0; + for (succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I, ++i ) { + if (i == 8 || (i-8) % 10 == 0) { + OS << "\n "; + } + OS << " B" << (*I)->getBlockID(); + } + OS << '\n'; +} \ No newline at end of file diff --git a/Driver/ASTStreamers.cpp b/Driver/ASTStreamers.cpp index 7f67cd405d..170c593afa 100644 --- a/Driver/ASTStreamers.cpp +++ b/Driver/ASTStreamers.cpp @@ -13,6 +13,7 @@ #include "ASTStreamers.h" #include "clang/AST/AST.h" +#include "clang/AST/CFG.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/ASTStreamer.h" using namespace clang; @@ -142,4 +143,30 @@ void clang::DumpASTs(Preprocessor &PP, unsigned MainFileID, bool Stats) { ASTStreamer_Terminate(Streamer); } +void clang::DumpCFGs(Preprocessor &PP, unsigned MainFileID, bool Stats) { + ASTContext Context(PP.getTargetInfo(), PP.getIdentifierTable()); + ASTStreamerTy *Streamer = ASTStreamer_Init(PP, Context, MainFileID); + + while (Decl *D = ASTStreamer_ReadTopLevelDecl(Streamer)) { + if (FunctionDecl *FD = dyn_cast(D)) { + if (FD->getBody()) { + PrintFunctionDeclStart(FD); + fprintf(stderr,"\n"); + if (CFG* C = CFG::BuildCFG(FD->getBody())) + C->dump(); + else + fprintf(stderr," Error processing CFG.\n"); + } + } + } + + if (Stats) { + fprintf(stderr, "\nSTATISTICS:\n"); + ASTStreamer_PrintStats(Streamer); + Context.PrintStats(); + } + + ASTStreamer_Terminate(Streamer); +} + diff --git a/Driver/ASTStreamers.h b/Driver/ASTStreamers.h index 93749a46e7..6e90e2f683 100644 --- a/Driver/ASTStreamers.h +++ b/Driver/ASTStreamers.h @@ -23,6 +23,7 @@ class TypedefDecl; void BuildASTs(Preprocessor &PP, unsigned MainFileID, bool Stats); void PrintASTs(Preprocessor &PP, unsigned MainFileID, bool Stats); void DumpASTs(Preprocessor &PP, unsigned MainFileID, bool Stats); +void DumpCFGs(Preprocessor &PP, unsigned MainFileID, bool Stats); } // end clang namespace diff --git a/Driver/clang.cpp b/Driver/clang.cpp index 2ee8a93a30..d284434bed 100644 --- a/Driver/clang.cpp +++ b/Driver/clang.cpp @@ -52,6 +52,7 @@ enum ProgActions { ParseASTDump, // Parse ASTs and dump them. ParseASTCheck, // Parse ASTs and check diagnostics. ParseAST, // Parse ASTs. + ParseCFGDump, // Parse ASTS. Build CFGs. Print CFGs. ParsePrintCallbacks, // Parse and print each callback. ParseSyntaxOnly, // Parse and perform semantic analysis. ParseNoop, // Parse with noop callbacks. @@ -84,6 +85,8 @@ ProgAction(llvm::cl::desc("Choose output type:"), llvm::cl::ZeroOrMore, "Run parser, build ASTs, then dump them"), clEnumValN(ParseASTCheck, "parse-ast-check", "Run parser, build ASTs, then check diagnostics"), + clEnumValN(ParseCFGDump, "dump-cfg", + "Run parser, build ASTs, then build and print CFGs."), clEnumValN(EmitLLVM, "emit-llvm", "Build ASTs then convert to LLVM, emit .ll file"), clEnumValEnd)); @@ -825,6 +828,9 @@ static void ProcessInputFile(Preprocessor &PP, unsigned MainFileID, case ParseASTDump: DumpASTs(PP, MainFileID, Stats); break; + case ParseCFGDump: + DumpCFGs(PP, MainFileID, Stats); + break; case EmitLLVM: EmitLLVMFromASTs(PP, MainFileID, Stats); break; diff --git a/include/clang/AST/CFG.h b/include/clang/AST/CFG.h new file mode 100644 index 0000000000..503930979d --- /dev/null +++ b/include/clang/AST/CFG.h @@ -0,0 +1,196 @@ +//===--- CFG.h - Classes for representing and building CFGs------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Ted Kremenek and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CFG and CFGBuilder classes for representing and +// building Control-Flow Graphs (CFGs) from ASTs. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +namespace clang { + +class Stmt; + +/// CFGBlock - Represents a single basic block in a source-level CFG. +/// It consists of: +/// +/// (1) A set of statements/expressions (which may contain subexpressions). +/// (2) A "terminator" statement (not in the set of statements). +/// (3) A list of successors and predecessors. +/// +/// Terminator: The terminator represents the type of control-flow that occurs +/// at the end of the basic block. The terminator is a Stmt* referring to an +/// AST node that has control-flow: if-statements, breaks, loops, etc. +/// If the control-flow is conditional, the condition expression will appear +/// within the set of statements in the block (usually the last statement). +/// +/// Predecessors: the order in the set of predecessors is arbitrary. +/// +/// Successors: the order in the set of successors is NOT arbitrary. We +/// currently have the following orderings based on the terminator: +/// +/// Terminator Successor Ordering +/// ----------------------------------------------------- +/// if Then Block; Else Block +/// +class CFGBlock { + typedef std::vector StatementListTy; + /// Stmts - The set of statements in the basic block. + StatementListTy Stmts; + /// ControlFlowStmt - The terminator for a basic block that + /// indicates the type of control-flow that occurs between a block + /// and its successors. + Stmt* ControlFlowStmt; + /// BlockID - A numerical ID assigned to a CFGBlock during construction + /// of the CFG. + unsigned BlockID; + + /// Predecessors/Successors - Keep track of the predecessor / successor + /// CFG blocks. + typedef std::vector AdjacentBlocks; + AdjacentBlocks Preds; + AdjacentBlocks Succs; + +public: + explicit CFGBlock(unsigned blockid) : ControlFlowStmt(NULL), + BlockID(blockid) {} + ~CFGBlock() {}; + + // Statement iterators + typedef StatementListTy::iterator iterator; + typedef StatementListTy::const_iterator const_iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + + Stmt* front() { return Stmts.front(); } + Stmt* back() { return Stmts.back(); } + + iterator begin() { return Stmts.begin(); } + iterator end() { return Stmts.end(); } + const_iterator begin() const { return Stmts.begin(); } + const_iterator end() const { return Stmts.end(); } + + reverse_iterator rbegin() { return Stmts.rbegin(); } + reverse_iterator rend() { return Stmts.rend(); } + const_reverse_iterator rbegin() const { return Stmts.rbegin(); } + const_reverse_iterator rend() const { return Stmts.rend(); } + + unsigned size() const { return Stmts.size(); } + bool empty() const { return Stmts.empty(); } + + // CFG iterators + typedef AdjacentBlocks::iterator pred_iterator; + typedef AdjacentBlocks::const_iterator const_pred_iterator; + typedef AdjacentBlocks::reverse_iterator pred_reverse_iterator; + typedef AdjacentBlocks::const_reverse_iterator const_pred_reverse_iterator; + + typedef AdjacentBlocks::iterator succ_iterator; + typedef AdjacentBlocks::const_iterator const_succ_iterator; + typedef AdjacentBlocks::reverse_iterator succ_reverse_iterator; + typedef AdjacentBlocks::const_reverse_iterator const_succ_reverse_iterator; + + pred_iterator pred_begin() { return Preds.begin(); } + pred_iterator pred_end() { return Preds.end(); } + const_pred_iterator pred_begin() const { return Preds.begin(); } + const_pred_iterator pred_end() const { return Preds.end(); } + + pred_reverse_iterator pred_rbegin() { return Preds.rbegin(); } + pred_reverse_iterator pred_rend() { return Preds.rend(); } + const_pred_reverse_iterator pred_rbegin() const { return Preds.rbegin(); } + const_pred_reverse_iterator pred_rend() const { return Preds.rend(); } + + succ_iterator succ_begin() { return Succs.begin(); } + succ_iterator succ_end() { return Succs.end(); } + const_succ_iterator succ_begin() const { return Succs.begin(); } + const_succ_iterator succ_end() const { return Succs.end(); } + + succ_reverse_iterator succ_rbegin() { return Succs.rbegin(); } + succ_reverse_iterator succ_rend() { return Succs.rend(); } + const_succ_reverse_iterator succ_rbegin() const { return Succs.rbegin(); } + const_succ_reverse_iterator succ_rend() const { return Succs.rend(); } + + unsigned succ_size() const { return Succs.size(); } + bool succ_empty() const { return Succs.empty(); } + + unsigned pred_size() const { return Preds.size(); } + bool pred_empty() const { return Preds.empty(); } + + // Manipulation of block contents + + void appendStmt(Stmt* Statement) { Stmts.push_back(Statement); } + void setTerminator(Stmt* Statement) { ControlFlowStmt = Statement; } + void reverseStmts(); + + void addSuccessor(CFGBlock* Block) { + Block->Preds.push_back(this); + Succs.push_back(Block); + } + + unsigned getBlockID() const { return BlockID; } + + void dump(); + void print(std::ostream& OS); +}; + + +/// CFG - Represents a source-level, intra-procedural CFG that represents the +/// control-flow of a Stmt. The Stmt can represent an entire function body, +/// or a single expression. A CFG will always contain one empty block that +/// represents the Exit point of the CFG. A CFG will also contain a designated +/// Entry block. The CFG solely represents control-flow; it consists of +/// CFGBlocks which are simply containers of Stmt*'s in the AST the CFG +/// was constructed from. +class CFG { + typedef std::list CFGBlockListTy; + CFGBlockListTy Blocks; + +public: + + CFG() {}; + ~CFG() {}; + + CFGBlock* createBlock(unsigned blockID) { + Blocks.push_front(CFGBlock(blockID)); + return front(); + } + + // Block iterators + typedef CFGBlockListTy::iterator iterator; + typedef CFGBlockListTy::const_iterator const_iterator; + typedef std::reverse_iterator reverse_iterator; + typedef std::reverse_iterator const_reverse_iterator; + + CFGBlock* front() { return &Blocks.front(); } + CFGBlock* back() { return &Blocks.back(); } + + iterator begin() { return Blocks.begin(); } + iterator end() { return Blocks.end(); } + const_iterator begin() const { return Blocks.begin(); } + const_iterator end() const { return Blocks.end(); } + + reverse_iterator rbegin() { return Blocks.rbegin(); } + reverse_iterator rend() { return Blocks.rend(); } + const_reverse_iterator rbegin() const { return Blocks.rbegin(); } + const_reverse_iterator rend() const { return Blocks.rend(); } + + CFGBlock* getEntry() { return front(); } + CFGBlock* getExit() { return back(); } + + // Utility + + static CFG* BuildCFG(Stmt* AST); + void print(std::ostream& OS); + void dump(); + +}; + +} // end namespace clang \ No newline at end of file -- 2.50.1