From: DeLesley Hutchins Date: Wed, 10 Sep 2014 22:12:52 +0000 (+0000) Subject: Thread Safety Analysis: major update to thread safety TIL. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1c11b7cda0af21cf2c52fdaa4392977df85a1a9c;p=clang Thread Safety Analysis: major update to thread safety TIL. Numerous changes, including: * Changed the way variables and instructions are handled in basic blocks to be more efficient. * Eliminated SExprRef. * Simplified futures. * Fixed documentation. * Compute dominator and post dominator trees. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@217556 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index edd6c8c168..01492685c7 100644 --- a/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -477,9 +477,9 @@ private: // Indexed by clang BlockID. LVarDefinitionMap CurrentLVarMap; - std::vector CurrentArguments; - std::vector CurrentInstructions; - std::vector IncompleteArgs; + std::vector CurrentArguments; + std::vector CurrentInstructions; + std::vector IncompleteArgs; til::BasicBlock *CurrentBB; BlockInfo *CurrentBlockInfo; }; diff --git a/include/clang/Analysis/Analyses/ThreadSafetyLogical.h b/include/clang/Analysis/Analyses/ThreadSafetyLogical.h index b08d7c4b54..bc78021343 100644 --- a/include/clang/Analysis/Analyses/ThreadSafetyLogical.h +++ b/include/clang/Analysis/Analyses/ThreadSafetyLogical.h @@ -41,13 +41,13 @@ private: }; class Terminal : public LExpr { - til::SExprRef Expr; + til::SExpr *Expr; public: Terminal(til::SExpr *Expr) : LExpr(LExpr::Terminal), Expr(Expr) {} - const til::SExpr *expr() const { return Expr.get(); } - til::SExpr *expr() { return Expr.get(); } + const til::SExpr *expr() const { return Expr; } + til::SExpr *expr() { return Expr; } static bool classof(const LExpr *E) { return E->kind() == LExpr::Terminal; } }; diff --git a/include/clang/Analysis/Analyses/ThreadSafetyOps.def b/include/clang/Analysis/Analyses/ThreadSafetyOps.def index 6ebc95dbe9..0d2458b0c8 100644 --- a/include/clang/Analysis/Analyses/ThreadSafetyOps.def +++ b/include/clang/Analysis/Analyses/ThreadSafetyOps.def @@ -44,8 +44,11 @@ TIL_OPCODE_DEF(Cast) TIL_OPCODE_DEF(SCFG) TIL_OPCODE_DEF(BasicBlock) TIL_OPCODE_DEF(Phi) + +// Terminator instructions TIL_OPCODE_DEF(Goto) TIL_OPCODE_DEF(Branch) +TIL_OPCODE_DEF(Return) // pseudo-terms TIL_OPCODE_DEF(Identifier) diff --git a/include/clang/Analysis/Analyses/ThreadSafetyTIL.h b/include/clang/Analysis/Analyses/ThreadSafetyTIL.h index 8e244f99a7..da0657990a 100644 --- a/include/clang/Analysis/Analyses/ThreadSafetyTIL.h +++ b/include/clang/Analysis/Analyses/ThreadSafetyTIL.h @@ -63,24 +63,27 @@ namespace threadSafety { namespace til { +/// Enum for the different distinct classes of SExpr enum TIL_Opcode { #define TIL_OPCODE_DEF(X) COP_##X, #include "ThreadSafetyOps.def" #undef TIL_OPCODE_DEF }; +/// Opcode for unary arithmetic operations. enum TIL_UnaryOpcode : unsigned char { UOP_Minus, // - UOP_BitNot, // ~ UOP_LogicNot // ! }; +/// Opcode for binary arithmetic operations. enum TIL_BinaryOpcode : unsigned char { + BOP_Add, // + + BOP_Sub, // - BOP_Mul, // * BOP_Div, // / BOP_Rem, // % - BOP_Add, // + - BOP_Sub, // - BOP_Shl, // << BOP_Shr, // >> BOP_BitAnd, // & @@ -90,10 +93,11 @@ enum TIL_BinaryOpcode : unsigned char { BOP_Neq, // != BOP_Lt, // < BOP_Leq, // <= - BOP_LogicAnd, // && - BOP_LogicOr // || + BOP_LogicAnd, // && (no short-circuit) + BOP_LogicOr // || (no short-circuit) }; +/// Opcode for cast operations. enum TIL_CastOpcode : unsigned char { CAST_none = 0, CAST_extendNum, // extend precision of numeric type @@ -107,21 +111,24 @@ const TIL_Opcode COP_Min = COP_Future; const TIL_Opcode COP_Max = COP_Branch; const TIL_UnaryOpcode UOP_Min = UOP_Minus; const TIL_UnaryOpcode UOP_Max = UOP_LogicNot; -const TIL_BinaryOpcode BOP_Min = BOP_Mul; +const TIL_BinaryOpcode BOP_Min = BOP_Add; const TIL_BinaryOpcode BOP_Max = BOP_LogicOr; const TIL_CastOpcode CAST_Min = CAST_none; const TIL_CastOpcode CAST_Max = CAST_toInt; +/// Return the name of a unary opcode. StringRef getUnaryOpcodeString(TIL_UnaryOpcode Op); + +/// Return the name of a binary opcode. StringRef getBinaryOpcodeString(TIL_BinaryOpcode Op); -// ValueTypes are data types that can actually be held in registers. -// All variables and expressions must have a vBNF_Nonealue type. -// Pointer types are further subdivided into the various heap-allocated -// types, such as functions, records, etc. -// Structured types that are passed by value (e.g. complex numbers) -// require special handling; they use BT_ValueRef, and size ST_0. +/// ValueTypes are data types that can actually be held in registers. +/// All variables and expressions must have a value type. +/// Pointer types are further subdivided into the various heap-allocated +/// types, such as functions, records, etc. +/// Structured types that are passed by value (e.g. complex numbers) +/// require special handling; they use BT_ValueRef, and size ST_0. struct ValueType { enum BaseType : unsigned char { BT_Void = 0, @@ -247,8 +254,10 @@ inline ValueType ValueType::getValueType() { } +class BasicBlock; + -// Base class for AST nodes in the typed intermediate language. +/// Base class for AST nodes in the typed intermediate language. class SExpr { public: TIL_Opcode opcode() const { return static_cast(Opcode); } @@ -267,71 +276,47 @@ public: // template typename C::CType compare(CType* E, C& Cmp) { // compare all subexpressions, following the comparator interface // } - void *operator new(size_t S, MemRegionRef &R) { return ::operator new(S, R); } - // SExpr objects cannot be deleted. + /// SExpr objects cannot be deleted. // This declaration is public to workaround a gcc bug that breaks building // with REQUIRES_EH=1. void operator delete(void *) LLVM_DELETED_FUNCTION; + /// Returns the instruction ID for this expression. + /// All basic block instructions have a unique ID (i.e. virtual register). + unsigned id() const { return SExprID; } + + /// Returns the block, if this is an instruction in a basic block, + /// otherwise returns null. + BasicBlock* block() const { return Block; } + + /// Set the basic block and instruction ID for this expression. + void setID(BasicBlock *B, unsigned id) { Block = B; SExprID = id; } + protected: - SExpr(TIL_Opcode Op) : Opcode(Op), Reserved(0), Flags(0) {} - SExpr(const SExpr &E) : Opcode(E.Opcode), Reserved(0), Flags(E.Flags) {} + SExpr(TIL_Opcode Op) + : Opcode(Op), Reserved(0), Flags(0), SExprID(0), Block(nullptr) {} + SExpr(const SExpr &E) + : Opcode(E.Opcode), Reserved(0), Flags(E.Flags), SExprID(0), + Block(nullptr) {} const unsigned char Opcode; unsigned char Reserved; unsigned short Flags; + unsigned SExprID; + BasicBlock* Block; private: SExpr() LLVM_DELETED_FUNCTION; - // SExpr objects must be created in an arena. + /// SExpr objects must be created in an arena. void *operator new(size_t) LLVM_DELETED_FUNCTION; }; -// Class for owning references to SExprs. -// Includes attach/detach logic for counting variable references and lazy -// rewriting strategies. -class SExprRef { -public: - SExprRef() : Ptr(nullptr) { } - SExprRef(std::nullptr_t P) : Ptr(nullptr) { } - SExprRef(SExprRef &&R) : Ptr(R.Ptr) { R.Ptr = nullptr; } - - // Defined after Variable and Future, below. - inline SExprRef(SExpr *P); - inline ~SExprRef(); - - SExpr *get() { return Ptr; } - const SExpr *get() const { return Ptr; } - - SExpr *operator->() { return get(); } - const SExpr *operator->() const { return get(); } - - SExpr &operator*() { return *Ptr; } - const SExpr &operator*() const { return *Ptr; } - - bool operator==(const SExprRef &R) const { return Ptr == R.Ptr; } - bool operator!=(const SExprRef &R) const { return !operator==(R); } - bool operator==(const SExpr *P) const { return Ptr == P; } - bool operator!=(const SExpr *P) const { return !operator==(P); } - bool operator==(std::nullptr_t) const { return Ptr == nullptr; } - bool operator!=(std::nullptr_t) const { return Ptr != nullptr; } - - inline void reset(SExpr *E); - -private: - inline void attach(); - inline void detach(); - - SExpr *Ptr; -}; - - // Contains various helper functions for SExprs. namespace ThreadSafetyTIL { inline bool isTrivial(const SExpr *E) { @@ -343,62 +328,64 @@ namespace ThreadSafetyTIL { // Nodes which declare variables class Function; class SFunction; -class BasicBlock; class Let; -// A named variable, e.g. "x". -// -// There are two distinct places in which a Variable can appear in the AST. -// A variable declaration introduces a new variable, and can occur in 3 places: -// Let-expressions: (Let (x = t) u) -// Functions: (Function (x : t) u) -// Self-applicable functions (SFunction (x) t) -// -// If a variable occurs in any other location, it is a reference to an existing -// variable declaration -- e.g. 'x' in (x * y + z). To save space, we don't -// allocate a separate AST node for variable references; a reference is just a -// pointer to the original declaration. +/// A named variable, e.g. "x". +/// +/// There are two distinct places in which a Variable can appear in the AST. +/// A variable declaration introduces a new variable, and can occur in 3 places: +/// Let-expressions: (Let (x = t) u) +/// Functions: (Function (x : t) u) +/// Self-applicable functions (SFunction (x) t) +/// +/// If a variable occurs in any other location, it is a reference to an existing +/// variable declaration -- e.g. 'x' in (x * y + z). To save space, we don't +/// allocate a separate AST node for variable references; a reference is just a +/// pointer to the original declaration. class Variable : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Variable; } - // Let-variable, function parameter, or self-variable enum VariableKind { - VK_Let, - VK_LetBB, - VK_Fun, - VK_SFun + VK_Let, ///< Let-variable + VK_Fun, ///< Function parameter + VK_SFun ///< SFunction (self) parameter }; - // These are defined after SExprRef contructor, below - inline Variable(SExpr *D, const clang::ValueDecl *Cvd = nullptr); - inline Variable(StringRef s, SExpr *D = nullptr); - inline Variable(const Variable &Vd, SExpr *D); + Variable(StringRef s, SExpr *D = nullptr) + : SExpr(COP_Variable), Name(s), Definition(D), Cvdecl(nullptr) { + Flags = VK_Let; + } + Variable(SExpr *D, const clang::ValueDecl *Cvd = nullptr) + : SExpr(COP_Variable), Name(Cvd ? Cvd->getName() : "_x"), + Definition(D), Cvdecl(Cvd) { + Flags = VK_Let; + } + Variable(const Variable &Vd, SExpr *D) // rewrite constructor + : SExpr(Vd), Name(Vd.Name), Definition(D), Cvdecl(Vd.Cvdecl) { + Flags = Vd.kind(); + } + /// Return the kind of variable (let, function param, or self) VariableKind kind() const { return static_cast(Flags); } + /// Return the name of the variable, if any. StringRef name() const { return Name; } - const clang::ValueDecl *clangDecl() const { return Cvdecl; } - // Returns the definition (for let vars) or type (for parameter & self vars) - SExpr *definition() { return Definition.get(); } - const SExpr *definition() const { return Definition.get(); } - - void attachVar() const { ++NumUses; } - void detachVar() const { assert(NumUses > 0); --NumUses; } + /// Return the clang declaration for this variable, if any. + const clang::ValueDecl *clangDecl() const { return Cvdecl; } - unsigned getID() const { return Id; } - unsigned getBlockID() const { return BlockID; } + /// Return the definition of the variable. + /// For let-vars, this is the setting expression. + /// For function and self parameters, it is the type of the variable. + SExpr *definition() { return Definition; } + const SExpr *definition() const { return Definition; } - void setName(StringRef S) { Name = S; } - void setID(unsigned Bid, unsigned I) { - BlockID = static_cast(Bid); - Id = static_cast(I); - } - void setClangDecl(const clang::ValueDecl *VD) { Cvdecl = VD; } - void setDefinition(SExpr *E); + void setName(StringRef S) { Name = S; } void setKind(VariableKind K) { Flags = K; } + void setDefinition(SExpr *E) { Definition = E; } + void setClangDecl(const clang::ValueDecl *VD) { Cvdecl = VD; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -418,17 +405,13 @@ private: friend class Let; StringRef Name; // The name of the variable. - SExprRef Definition; // The TIL type or definition + SExpr* Definition; // The TIL type or definition const clang::ValueDecl *Cvdecl; // The clang declaration for this variable. - - unsigned short BlockID; - unsigned short Id; - mutable unsigned NumUses; }; -// Placeholder for an expression that has not yet been created. -// Used to implement lazy copy and rewriting strategies. +/// Placeholder for an expression that has not yet been created. +/// Used to implement lazy copy and rewriting strategies. class Future : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Future; } @@ -439,22 +422,14 @@ public: FS_done }; - Future() : - SExpr(COP_Future), Status(FS_pending), Result(nullptr), Location(nullptr) - {} + Future() : SExpr(COP_Future), Status(FS_pending), Result(nullptr) {} + private: virtual ~Future() LLVM_DELETED_FUNCTION; -public: - - // Registers the location in the AST where this future is stored. - // Forcing the future will automatically update the AST. - static inline void registerLocation(SExprRef *Member) { - if (Future *F = dyn_cast_or_null(Member->get())) - F->Location = Member; - } +public: // A lazy rewriting strategy should subclass Future and override this method. - virtual SExpr *create() { return nullptr; } + virtual SExpr *compute() { return nullptr; } // Return the result of this future if it exists, otherwise return null. SExpr *maybeGetResult() const { @@ -465,8 +440,7 @@ public: SExpr *result() { switch (Status) { case FS_pending: - force(); - return Result; + return force(); case FS_evaluating: return nullptr; // infinite loop; illegal recursion. case FS_done: @@ -488,81 +462,14 @@ public: } private: - // Force the future. - inline void force(); + SExpr* force(); FutureStatus Status; SExpr *Result; - SExprRef *Location; }; -inline void SExprRef::attach() { - if (!Ptr) - return; - - TIL_Opcode Op = Ptr->opcode(); - if (Op == COP_Variable) { - cast(Ptr)->attachVar(); - } else if (Op == COP_Future) { - cast(Ptr)->registerLocation(this); - } -} - -inline void SExprRef::detach() { - if (Ptr && Ptr->opcode() == COP_Variable) { - cast(Ptr)->detachVar(); - } -} - -inline SExprRef::SExprRef(SExpr *P) : Ptr(P) { - attach(); -} - -inline SExprRef::~SExprRef() { - detach(); -} - -inline void SExprRef::reset(SExpr *P) { - detach(); - Ptr = P; - attach(); -} - - -inline Variable::Variable(StringRef s, SExpr *D) - : SExpr(COP_Variable), Name(s), Definition(D), Cvdecl(nullptr), - BlockID(0), Id(0), NumUses(0) { - Flags = VK_Let; -} - -inline Variable::Variable(SExpr *D, const clang::ValueDecl *Cvd) - : SExpr(COP_Variable), Name(Cvd ? Cvd->getName() : "_x"), - Definition(D), Cvdecl(Cvd), BlockID(0), Id(0), NumUses(0) { - Flags = VK_Let; -} - -inline Variable::Variable(const Variable &Vd, SExpr *D) // rewrite constructor - : SExpr(Vd), Name(Vd.Name), Definition(D), Cvdecl(Vd.Cvdecl), - BlockID(0), Id(0), NumUses(0) { - Flags = Vd.kind(); -} - -inline void Variable::setDefinition(SExpr *E) { - Definition.reset(E); -} - -void Future::force() { - Status = FS_evaluating; - SExpr *R = create(); - Result = R; - if (Location) - Location->reset(R); - Status = FS_done; -} - - -// Placeholder for C++ expressions that cannot be represented in the TIL. +/// Placeholder for expressions that cannot be represented in the TIL. class Undefined : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Undefined; } @@ -585,7 +492,7 @@ private: }; -// Placeholder for a wildcard that matches any other expression. +/// Placeholder for a wildcard that matches any other expression. class Wildcard : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Wildcard; } @@ -716,8 +623,8 @@ typename V::R_SExpr Literal::traverse(V &Vs, typename V::R_Ctx Ctx) { } -// Literal pointer to an object allocated in memory. -// At compile time, pointer literals are represented by symbolic names. +/// A Literal pointer to an object allocated in memory. +/// At compile time, pointer literals are represented by symbolic names. class LiteralPtr : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_LiteralPtr; } @@ -743,9 +650,9 @@ private: }; -// A function -- a.k.a. lambda abstraction. -// Functions with multiple arguments are created by currying, -// e.g. (function (x: Int) (function (y: Int) (add x y))) +/// A function -- a.k.a. lambda abstraction. +/// Functions with multiple arguments are created by currying, +/// e.g. (Function (x: Int) (Function (y: Int) (Code { return x + y }))) class Function : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Function; } @@ -762,8 +669,8 @@ public: Variable *variableDecl() { return VarDecl; } const Variable *variableDecl() const { return VarDecl; } - SExpr *body() { return Body.get(); } - const SExpr *body() const { return Body.get(); } + SExpr *body() { return Body; } + const SExpr *body() const { return Body; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -790,13 +697,13 @@ public: private: Variable *VarDecl; - SExprRef Body; + SExpr* Body; }; -// A self-applicable function. -// A self-applicable function can be applied to itself. It's useful for -// implementing objects and late binding +/// A self-applicable function. +/// A self-applicable function can be applied to itself. It's useful for +/// implementing objects and late binding. class SFunction : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_SFunction; } @@ -805,20 +712,20 @@ public: : SExpr(COP_SFunction), VarDecl(Vd), Body(B) { assert(Vd->Definition == nullptr); Vd->setKind(Variable::VK_SFun); - Vd->Definition.reset(this); + Vd->Definition = this; } SFunction(const SFunction &F, Variable *Vd, SExpr *B) // rewrite constructor : SExpr(F), VarDecl(Vd), Body(B) { assert(Vd->Definition == nullptr); Vd->setKind(Variable::VK_SFun); - Vd->Definition.reset(this); + Vd->Definition = this; } Variable *variableDecl() { return VarDecl; } const Variable *variableDecl() const { return VarDecl; } - SExpr *body() { return Body.get(); } - const SExpr *body() const { return Body.get(); } + SExpr *body() { return Body; } + const SExpr *body() const { return Body; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -842,11 +749,11 @@ public: private: Variable *VarDecl; - SExprRef Body; + SExpr* Body; }; -// A block of code -- e.g. the body of a function. +/// A block of code -- e.g. the body of a function. class Code : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Code; } @@ -855,11 +762,11 @@ public: Code(const Code &C, SExpr *T, SExpr *B) // rewrite constructor : SExpr(C), ReturnType(T), Body(B) {} - SExpr *returnType() { return ReturnType.get(); } - const SExpr *returnType() const { return ReturnType.get(); } + SExpr *returnType() { return ReturnType; } + const SExpr *returnType() const { return ReturnType; } - SExpr *body() { return Body.get(); } - const SExpr *body() const { return Body.get(); } + SExpr *body() { return Body; } + const SExpr *body() const { return Body; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -877,12 +784,12 @@ public: } private: - SExprRef ReturnType; - SExprRef Body; + SExpr* ReturnType; + SExpr* Body; }; -// A typed, writable location in memory +/// A typed, writable location in memory class Field : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Field; } @@ -891,11 +798,11 @@ public: Field(const Field &C, SExpr *R, SExpr *B) // rewrite constructor : SExpr(C), Range(R), Body(B) {} - SExpr *range() { return Range.get(); } - const SExpr *range() const { return Range.get(); } + SExpr *range() { return Range; } + const SExpr *range() const { return Range; } - SExpr *body() { return Body.get(); } - const SExpr *body() const { return Body.get(); } + SExpr *body() { return Body; } + const SExpr *body() const { return Body; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -913,12 +820,16 @@ public: } private: - SExprRef Range; - SExprRef Body; + SExpr* Range; + SExpr* Body; }; -// Apply an argument to a function +/// Apply an argument to a function. +/// Note that this does not actually call the function. Functions are curried, +/// so this returns a closure in which the first parameter has been applied. +/// Once all parameters have been applied, Call can be used to invoke the +/// function. class Apply : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Apply; } @@ -928,11 +839,11 @@ public: : SExpr(A), Fun(F), Arg(Ar) {} - SExpr *fun() { return Fun.get(); } - const SExpr *fun() const { return Fun.get(); } + SExpr *fun() { return Fun; } + const SExpr *fun() const { return Fun; } - SExpr *arg() { return Arg.get(); } - const SExpr *arg() const { return Arg.get(); } + SExpr *arg() { return Arg; } + const SExpr *arg() const { return Arg; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -950,12 +861,12 @@ public: } private: - SExprRef Fun; - SExprRef Arg; + SExpr* Fun; + SExpr* Arg; }; -// Apply a self-argument to a self-applicable function +/// Apply a self-argument to a self-applicable function. class SApply : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_SApply; } @@ -964,18 +875,18 @@ public: SApply(SApply &A, SExpr *Sf, SExpr *Ar = nullptr) // rewrite constructor : SExpr(A), Sfun(Sf), Arg(Ar) {} - SExpr *sfun() { return Sfun.get(); } - const SExpr *sfun() const { return Sfun.get(); } + SExpr *sfun() { return Sfun; } + const SExpr *sfun() const { return Sfun; } - SExpr *arg() { return Arg.get() ? Arg.get() : Sfun.get(); } - const SExpr *arg() const { return Arg.get() ? Arg.get() : Sfun.get(); } + SExpr *arg() { return Arg ? Arg : Sfun; } + const SExpr *arg() const { return Arg ? Arg : Sfun; } bool isDelegation() const { return Arg != nullptr; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { auto Nf = Vs.traverse(Sfun, Vs.subExprCtx(Ctx)); - typename V::R_SExpr Na = Arg.get() ? Vs.traverse(Arg, Vs.subExprCtx(Ctx)) + typename V::R_SExpr Na = Arg ? Vs.traverse(Arg, Vs.subExprCtx(Ctx)) : nullptr; return Vs.reduceSApply(*this, Nf, Na); } @@ -989,12 +900,12 @@ public: } private: - SExprRef Sfun; - SExprRef Arg; + SExpr* Sfun; + SExpr* Arg; }; -// Project a named slot from a C++ struct or class. +/// Project a named slot from a C++ struct or class. class Project : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Project; } @@ -1009,8 +920,8 @@ public: : SExpr(P), Rec(R), SlotName(P.SlotName), Cvdecl(P.Cvdecl) { } - SExpr *record() { return Rec.get(); } - const SExpr *record() const { return Rec.get(); } + SExpr *record() { return Rec; } + const SExpr *record() const { return Rec; } const clang::ValueDecl *clangDecl() const { return Cvdecl; } @@ -1042,13 +953,13 @@ public: } private: - SExprRef Rec; + SExpr* Rec; StringRef SlotName; const clang::ValueDecl *Cvdecl; }; -// Call a function (after all arguments have been applied). +/// Call a function (after all arguments have been applied). class Call : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Call; } @@ -1057,8 +968,8 @@ public: : SExpr(COP_Call), Target(T), Cexpr(Ce) {} Call(const Call &C, SExpr *T) : SExpr(C), Target(T), Cexpr(C.Cexpr) {} - SExpr *target() { return Target.get(); } - const SExpr *target() const { return Target.get(); } + SExpr *target() { return Target; } + const SExpr *target() const { return Target; } const clang::CallExpr *clangCallExpr() const { return Cexpr; } @@ -1074,12 +985,12 @@ public: } private: - SExprRef Target; + SExpr* Target; const clang::CallExpr *Cexpr; }; -// Allocate memory for a new value on the heap or stack. +/// Allocate memory for a new value on the heap or stack. class Alloc : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Call; } @@ -1094,8 +1005,8 @@ public: AllocKind kind() const { return static_cast(Flags); } - SExpr *dataType() { return Dtype.get(); } - const SExpr *dataType() const { return Dtype.get(); } + SExpr *dataType() { return Dtype; } + const SExpr *dataType() const { return Dtype; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1112,11 +1023,11 @@ public: } private: - SExprRef Dtype; + SExpr* Dtype; }; -// Load a value from memory. +/// Load a value from memory. class Load : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Load; } @@ -1124,8 +1035,8 @@ public: Load(SExpr *P) : SExpr(COP_Load), Ptr(P) {} Load(const Load &L, SExpr *P) : SExpr(L), Ptr(P) {} - SExpr *pointer() { return Ptr.get(); } - const SExpr *pointer() const { return Ptr.get(); } + SExpr *pointer() { return Ptr; } + const SExpr *pointer() const { return Ptr; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1139,12 +1050,12 @@ public: } private: - SExprRef Ptr; + SExpr* Ptr; }; -// Store a value to memory. -// Source is a pointer, destination is the value to store. +/// Store a value to memory. +/// The destination is a pointer to a field, the source is the value to store. class Store : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Store; } @@ -1152,11 +1063,11 @@ public: Store(SExpr *P, SExpr *V) : SExpr(COP_Store), Dest(P), Source(V) {} Store(const Store &S, SExpr *P, SExpr *V) : SExpr(S), Dest(P), Source(V) {} - SExpr *destination() { return Dest.get(); } // Address to store to - const SExpr *destination() const { return Dest.get(); } + SExpr *destination() { return Dest; } // Address to store to + const SExpr *destination() const { return Dest; } - SExpr *source() { return Source.get(); } // Value to store - const SExpr *source() const { return Source.get(); } + SExpr *source() { return Source; } // Value to store + const SExpr *source() const { return Source; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1174,13 +1085,13 @@ public: } private: - SExprRef Dest; - SExprRef Source; + SExpr* Dest; + SExpr* Source; }; -// If p is a reference to an array, then first(p) is a reference to the first -// element. The usual array notation p[i] becomes first(p + i). +/// If p is a reference to an array, then p[i] is a reference to the i'th +/// element of the array. class ArrayIndex : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_ArrayIndex; } @@ -1189,11 +1100,11 @@ public: ArrayIndex(const ArrayIndex &E, SExpr *A, SExpr *N) : SExpr(E), Array(A), Index(N) {} - SExpr *array() { return Array.get(); } - const SExpr *array() const { return Array.get(); } + SExpr *array() { return Array; } + const SExpr *array() const { return Array; } - SExpr *index() { return Index.get(); } - const SExpr *index() const { return Index.get(); } + SExpr *index() { return Index; } + const SExpr *index() const { return Index; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1211,14 +1122,14 @@ public: } private: - SExprRef Array; - SExprRef Index; + SExpr* Array; + SExpr* Index; }; -// Pointer arithmetic, restricted to arrays only. -// If p is a reference to an array, then p + n, where n is an integer, is -// a reference to a subarray. +/// Pointer arithmetic, restricted to arrays only. +/// If p is a reference to an array, then p + n, where n is an integer, is +/// a reference to a subarray. class ArrayAdd : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_ArrayAdd; } @@ -1227,11 +1138,11 @@ public: ArrayAdd(const ArrayAdd &E, SExpr *A, SExpr *N) : SExpr(E), Array(A), Index(N) {} - SExpr *array() { return Array.get(); } - const SExpr *array() const { return Array.get(); } + SExpr *array() { return Array; } + const SExpr *array() const { return Array; } - SExpr *index() { return Index.get(); } - const SExpr *index() const { return Index.get(); } + SExpr *index() { return Index; } + const SExpr *index() const { return Index; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1249,12 +1160,13 @@ public: } private: - SExprRef Array; - SExprRef Index; + SExpr* Array; + SExpr* Index; }; -// Simple unary operation -- e.g. !, ~, etc. +/// Simple arithmetic unary operations, e.g. negate and not. +/// These operations have no side-effects. class UnaryOp : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_UnaryOp; } @@ -1268,8 +1180,8 @@ public: return static_cast(Flags); } - SExpr *expr() { return Expr0.get(); } - const SExpr *expr() const { return Expr0.get(); } + SExpr *expr() { return Expr0; } + const SExpr *expr() const { return Expr0; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1287,11 +1199,12 @@ public: } private: - SExprRef Expr0; + SExpr* Expr0; }; -// Simple binary operation -- e.g. +, -, etc. +/// Simple arithmetic binary operations, e.g. +, -, etc. +/// These operations have no side effects. class BinaryOp : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_BinaryOp; } @@ -1309,11 +1222,11 @@ public: return static_cast(Flags); } - SExpr *expr0() { return Expr0.get(); } - const SExpr *expr0() const { return Expr0.get(); } + SExpr *expr0() { return Expr0; } + const SExpr *expr0() const { return Expr0; } - SExpr *expr1() { return Expr1.get(); } - const SExpr *expr1() const { return Expr1.get(); } + SExpr *expr1() { return Expr1; } + const SExpr *expr1() const { return Expr1; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1335,12 +1248,14 @@ public: } private: - SExprRef Expr0; - SExprRef Expr1; + SExpr* Expr0; + SExpr* Expr1; }; -// Cast expression +/// Cast expressions. +/// Cast expressions are essentially unary operations, but we treat them +/// as a distinct AST node because they only change the type of the result. class Cast : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Cast; } @@ -1352,8 +1267,8 @@ public: return static_cast(Flags); } - SExpr *expr() { return Expr0.get(); } - const SExpr *expr() const { return Expr0.get(); } + SExpr *expr() { return Expr0; } + const SExpr *expr() const { return Expr0; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1371,16 +1286,18 @@ public: } private: - SExprRef Expr0; + SExpr* Expr0; }; class SCFG; +/// Phi Node, for code in SSA form. +/// Each Phi node has an array of possible values that it can take, +/// depending on where control flow comes from. class Phi : public SExpr { public: - // TODO: change to SExprRef typedef SimpleArray ValArray; // In minimal SSA form, all Phi nodes are MultiVal. @@ -1394,9 +1311,12 @@ public: static bool classof(const SExpr *E) { return E->opcode() == COP_Phi; } - Phi() : SExpr(COP_Phi) {} - Phi(MemRegionRef A, unsigned Nvals) : SExpr(COP_Phi), Values(A, Nvals) {} - Phi(const Phi &P, ValArray &&Vs) : SExpr(P), Values(std::move(Vs)) {} + Phi() + : SExpr(COP_Phi), Cvdecl(nullptr) {} + Phi(MemRegionRef A, unsigned Nvals) + : SExpr(COP_Phi), Values(A, Nvals), Cvdecl(nullptr) {} + Phi(const Phi &P, ValArray &&Vs) + : SExpr(P), Values(std::move(Vs)), Cvdecl(nullptr) {} const ValArray &values() const { return Values; } ValArray &values() { return Values; } @@ -1404,6 +1324,12 @@ public: Status status() const { return static_cast(Flags); } void setStatus(Status s) { Flags = s; } + /// Return the clang declaration of the variable for this Phi node, if any. + const clang::ValueDecl *clangDecl() const { return Cvdecl; } + + /// Set the clang variable associated with this Phi node. + void setClangDecl(const clang::ValueDecl *Cvd) { Cvdecl = Cvd; } + template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { typename V::template Container @@ -1423,65 +1349,260 @@ public: private: ValArray Values; + const clang::ValueDecl* Cvdecl; +}; + + +/// Base class for basic block terminators: Branch, Goto, and Return. +class Terminator : public SExpr { +public: + static bool classof(const SExpr *E) { + return E->opcode() >= COP_Goto && E->opcode() <= COP_Return; + } + +protected: + Terminator(TIL_Opcode Op) : SExpr(Op) {} + Terminator(const SExpr &E) : SExpr(E) {} + +public: + /// Return the list of basic blocks that this terminator can branch to. + ArrayRef successors(); + + ArrayRef successors() const { + return const_cast(this)->successors(); + } +}; + + +/// Jump to another basic block. +/// A goto instruction is essentially a tail-recursive call into another +/// block. In addition to the block pointer, it specifies an index into the +/// phi nodes of that block. The index can be used to retrieve the "arguments" +/// of the call. +class Goto : public Terminator { +public: + static bool classof(const SExpr *E) { return E->opcode() == COP_Goto; } + + Goto(BasicBlock *B, unsigned I) + : Terminator(COP_Goto), TargetBlock(B), Index(I) {} + Goto(const Goto &G, BasicBlock *B, unsigned I) + : Terminator(COP_Goto), TargetBlock(B), Index(I) {} + + const BasicBlock *targetBlock() const { return TargetBlock; } + BasicBlock *targetBlock() { return TargetBlock; } + + /// Returns the index into the + unsigned index() const { return Index; } + + /// Return the list of basic blocks that this terminator can branch to. + ArrayRef successors() { + return ArrayRef(&TargetBlock, 1); + } + + template + typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { + BasicBlock *Ntb = Vs.reduceBasicBlockRef(TargetBlock); + return Vs.reduceGoto(*this, Ntb); + } + + template + typename C::CType compare(const Goto *E, C &Cmp) const { + // TODO: implement CFG comparisons + return Cmp.comparePointers(this, E); + } + +private: + BasicBlock *TargetBlock; + unsigned Index; +}; + + +/// A conditional branch to two other blocks. +/// Note that unlike Goto, Branch does not have an index. The target blocks +/// must be child-blocks, and cannot have Phi nodes. +class Branch : public Terminator { +public: + static bool classof(const SExpr *E) { return E->opcode() == COP_Branch; } + + Branch(SExpr *C, BasicBlock *T, BasicBlock *E) + : Terminator(COP_Branch), Condition(C) { + Branches[0] = T; + Branches[1] = E; + } + Branch(const Branch &Br, SExpr *C, BasicBlock *T, BasicBlock *E) + : Terminator(Br), Condition(C) { + Branches[0] = T; + Branches[1] = E; + } + + const SExpr *condition() const { return Condition; } + SExpr *condition() { return Condition; } + + const BasicBlock *thenBlock() const { return Branches[0]; } + BasicBlock *thenBlock() { return Branches[0]; } + + const BasicBlock *elseBlock() const { return Branches[1]; } + BasicBlock *elseBlock() { return Branches[1]; } + + /// Return the list of basic blocks that this terminator can branch to. + ArrayRef successors() { + return ArrayRef(Branches, 2); + } + + template + typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { + auto Nc = Vs.traverse(Condition, Vs.subExprCtx(Ctx)); + BasicBlock *Ntb = Vs.reduceBasicBlockRef(Branches[0]); + BasicBlock *Nte = Vs.reduceBasicBlockRef(Branches[1]); + return Vs.reduceBranch(*this, Nc, Ntb, Nte); + } + + template + typename C::CType compare(const Branch *E, C &Cmp) const { + // TODO: implement CFG comparisons + return Cmp.comparePointers(this, E); + } + +private: + SExpr* Condition; + BasicBlock *Branches[2]; +}; + + +/// Return from the enclosing function, passing the return value to the caller. +/// Only the exit block should end with a return statement. +class Return : public Terminator { +public: + static bool classof(const SExpr *E) { return E->opcode() == COP_Return; } + + Return(SExpr* Rval) : Terminator(COP_Return), Retval(Rval) {} + Return(const Return &R, SExpr* Rval) : Terminator(R), Retval(Rval) {} + + /// Return an empty list. + ArrayRef successors() { + return ArrayRef(); + } + + SExpr *returnValue() { return Retval; } + const SExpr *returnValue() const { return Retval; } + + template + typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { + auto Ne = Vs.traverse(Retval, Vs.subExprCtx(Ctx)); + return Vs.reduceReturn(*this, Ne); + } + + template + typename C::CType compare(const Return *E, C &Cmp) const { + return Cmp.compare(Retval, E->Retval); + } + +private: + SExpr* Retval; }; -// A basic block is part of an SCFG, and can be treated as a function in -// continuation passing style. It consists of a sequence of phi nodes, which -// are "arguments" to the function, followed by a sequence of instructions. -// Both arguments and instructions define new variables. It ends with a -// branch or goto to another basic block in the same SCFG. +inline ArrayRef Terminator::successors() { + switch (opcode()) { + case COP_Goto: return cast(this)->successors(); + case COP_Branch: return cast(this)->successors(); + case COP_Return: return cast(this)->successors(); + default: + return ArrayRef(); + } +} + + +/// A basic block is part of an SCFG. It can be treated as a function in +/// continuation passing style. A block consists of a sequence of phi nodes, +/// which are "arguments" to the function, followed by a sequence of +/// instructions. It ends with a Terminator, which is a Branch or Goto to +/// another basic block in the same SCFG. class BasicBlock : public SExpr { public: - typedef SimpleArray VarArray; + typedef SimpleArray InstrArray; typedef SimpleArray BlockArray; + // TopologyNodes are used to overlay tree structures on top of the CFG, + // such as dominator and postdominator trees. Each block is assigned an + // ID in the tree according to a depth-first search. Tree traversals are + // always up, towards the parents. + struct TopologyNode { + TopologyNode() : NodeID(0), SizeOfSubTree(0), Parent(nullptr) {} + + bool isParentOf(const TopologyNode& OtherNode) { + return OtherNode.NodeID > NodeID && + OtherNode.NodeID < NodeID + SizeOfSubTree; + } + + bool isParentOfOrEqual(const TopologyNode& OtherNode) { + return OtherNode.NodeID >= NodeID && + OtherNode.NodeID < NodeID + SizeOfSubTree; + } + + int NodeID; + int SizeOfSubTree; // Includes this node, so must be > 1. + BasicBlock *Parent; // Pointer to parent. + }; + static bool classof(const SExpr *E) { return E->opcode() == COP_BasicBlock; } - explicit BasicBlock(MemRegionRef A, BasicBlock* P = nullptr) + explicit BasicBlock(MemRegionRef A) : SExpr(COP_BasicBlock), Arena(A), CFGPtr(nullptr), BlockID(0), - Parent(P), Terminator(nullptr) - { } - BasicBlock(BasicBlock &B, VarArray &&As, VarArray &&Is, SExpr *T) - : SExpr(COP_BasicBlock), Arena(B.Arena), CFGPtr(nullptr), BlockID(0), - Parent(nullptr), Args(std::move(As)), Instrs(std::move(Is)), - Terminator(T) - { } + Visited(0), TermInstr(nullptr) {} + BasicBlock(BasicBlock &B, MemRegionRef A, InstrArray &&As, InstrArray &&Is, + Terminator *T) + : SExpr(COP_BasicBlock), Arena(A), CFGPtr(nullptr), BlockID(0),Visited(0), + Args(std::move(As)), Instrs(std::move(Is)), TermInstr(T) {} + + /// Returns the block ID. Every block has a unique ID in the CFG. + int blockID() const { return BlockID; } - unsigned blockID() const { return BlockID; } - unsigned numPredecessors() const { return Predecessors.size(); } + /// Returns the number of predecessors. + size_t numPredecessors() const { return Predecessors.size(); } + size_t numSuccessors() const { return successors().size(); } const SCFG* cfg() const { return CFGPtr; } SCFG* cfg() { return CFGPtr; } - const BasicBlock *parent() const { return Parent; } - BasicBlock *parent() { return Parent; } + const BasicBlock *parent() const { return DominatorNode.Parent; } + BasicBlock *parent() { return DominatorNode.Parent; } - const VarArray &arguments() const { return Args; } - VarArray &arguments() { return Args; } + const InstrArray &arguments() const { return Args; } + InstrArray &arguments() { return Args; } - const VarArray &instructions() const { return Instrs; } - VarArray &instructions() { return Instrs; } + InstrArray &instructions() { return Instrs; } + const InstrArray &instructions() const { return Instrs; } - const BlockArray &predecessors() const { return Predecessors; } + /// Returns a list of predecessors. + /// The order of predecessors in the list is important; each phi node has + /// exactly one argument for each precessor, in the same order. BlockArray &predecessors() { return Predecessors; } + const BlockArray &predecessors() const { return Predecessors; } - const SExpr *terminator() const { return Terminator.get(); } - SExpr *terminator() { return Terminator.get(); } + ArrayRef successors() { return TermInstr->successors(); } + ArrayRef successors() const { return TermInstr->successors(); } - void setBlockID(unsigned i) { BlockID = i; } - void setParent(BasicBlock *P) { Parent = P; } - void setTerminator(SExpr *E) { Terminator.reset(E); } + const Terminator *terminator() const { return TermInstr; } + Terminator *terminator() { return TermInstr; } - // Add a new argument. V must define a phi-node. - void addArgument(Variable *V) { - V->setKind(Variable::VK_LetBB); + void setTerminator(Terminator *E) { TermInstr = E; } + + bool Dominates(const BasicBlock &Other) { + return DominatorNode.isParentOfOrEqual(Other.DominatorNode); + } + + bool PostDominates(const BasicBlock &Other) { + return PostDominatorNode.isParentOfOrEqual(Other.PostDominatorNode); + } + + /// Add a new argument. + void addArgument(Phi *V) { Args.reserveCheck(1, Arena); Args.push_back(V); } - // Add a new instruction. - void addInstruction(Variable *V) { - V->setKind(Variable::VK_LetBB); + /// Add a new instruction. + void addInstruction(SExpr *V) { Instrs.reserveCheck(1, Arena); Instrs.push_back(V); } @@ -1498,34 +1619,29 @@ public: // Reserve space for NumPreds predecessors, including space in phi nodes. void reservePredecessors(unsigned NumPreds); - // Return the index of BB, or Predecessors.size if BB is not a predecessor. + /// Return the index of BB, or Predecessors.size if BB is not a predecessor. unsigned findPredecessorIndex(const BasicBlock *BB) const { auto I = std::find(Predecessors.cbegin(), Predecessors.cend(), BB); return std::distance(Predecessors.cbegin(), I); } - // Set id numbers for variables. - void renumberVars(); - template typename V::R_BasicBlock traverse(V &Vs, typename V::R_Ctx Ctx) { - typename V::template Container Nas(Vs, Args.size()); - typename V::template Container Nis(Vs, Instrs.size()); + typename V::template Container Nas(Vs, Args.size()); + typename V::template Container Nis(Vs, Instrs.size()); // Entering the basic block should do any scope initialization. Vs.enterBasicBlock(*this); - for (auto *A : Args) { - auto Ne = Vs.traverse(A->Definition, Vs.subExprCtx(Ctx)); - Variable *Nvd = Vs.enterScope(*A, Ne); - Nas.push_back(Nvd); + for (auto *E : Args) { + auto Ne = Vs.traverse(E, Vs.subExprCtx(Ctx)); + Nas.push_back(Ne); } - for (auto *I : Instrs) { - auto Ne = Vs.traverse(I->Definition, Vs.subExprCtx(Ctx)); - Variable *Nvd = Vs.enterScope(*I, Ne); - Nis.push_back(Nvd); + for (auto *E : Instrs) { + auto Ne = Vs.traverse(E, Vs.subExprCtx(Ctx)); + Nis.push_back(Ne); } - auto Nt = Vs.traverse(Terminator, Ctx); + auto Nt = Vs.traverse(TermInstr, Ctx); // Exiting the basic block should handle any scope cleanup. Vs.exitBasicBlock(*this); @@ -1542,22 +1658,32 @@ public: private: friend class SCFG; - MemRegionRef Arena; + int renumberInstrs(int id); // assign unique ids to all instructions + int topologicalSort(SimpleArray& Blocks, int ID); + int topologicalFinalSort(SimpleArray& Blocks, int ID); + void computeDominator(); + void computePostDominator(); - SCFG *CFGPtr; // The CFG that contains this block. - unsigned BlockID; // unique id for this BB in the containing CFG - BasicBlock *Parent; // The parent block is the enclosing lexical scope. - // The parent dominates this block. - BlockArray Predecessors; // Predecessor blocks in the CFG. - VarArray Args; // Phi nodes. One argument per predecessor. - VarArray Instrs; // Instructions. - SExprRef Terminator; // Branch or Goto +private: + MemRegionRef Arena; // The arena used to allocate this block. + SCFG *CFGPtr; // The CFG that contains this block. + int BlockID : 31; // unique id for this BB in the containing CFG. + // IDs are in topological order. + int Visited : 1; // Bit to determine if a block has been visited + // during a traversal. + BlockArray Predecessors; // Predecessor blocks in the CFG. + InstrArray Args; // Phi nodes. One argument per predecessor. + InstrArray Instrs; // Instructions. + Terminator* TermInstr; // Terminating instruction + + TopologyNode DominatorNode; // The dominator tree + TopologyNode PostDominatorNode; // The post-dominator tree }; -// An SCFG is a control-flow graph. It consists of a set of basic blocks, each -// of which terminates in a branch to another basic block. There is one -// entry point, and one exit point. +/// An SCFG is a control-flow graph. It consists of a set of basic blocks, +/// each of which terminates in a branch to another basic block. There is one +/// entry point, and one exit point. class SCFG : public SExpr { public: typedef SimpleArray BlockArray; @@ -1568,20 +1694,29 @@ public: SCFG(MemRegionRef A, unsigned Nblocks) : SExpr(COP_SCFG), Arena(A), Blocks(A, Nblocks), - Entry(nullptr), Exit(nullptr) { - Entry = new (A) BasicBlock(A, nullptr); - Exit = new (A) BasicBlock(A, Entry); - auto *V = new (A) Variable(new (A) Phi()); + Entry(nullptr), Exit(nullptr), NumInstructions(0), Normal(false) { + Entry = new (A) BasicBlock(A); + Exit = new (A) BasicBlock(A); + auto *V = new (A) Phi(); Exit->addArgument(V); + Exit->setTerminator(new (A) Return(V)); add(Entry); add(Exit); } SCFG(const SCFG &Cfg, BlockArray &&Ba) // steals memory from Ba : SExpr(COP_SCFG), Arena(Cfg.Arena), Blocks(std::move(Ba)), - Entry(nullptr), Exit(nullptr) { + Entry(nullptr), Exit(nullptr), NumInstructions(0), Normal(false) { // TODO: set entry and exit! } + /// Return true if this CFG is valid. + bool valid() const { return Entry && Exit && Blocks.size() > 0; } + + /// Return true if this CFG has been normalized. + /// After normalization, blocks are in topological order, and block and + /// instruction IDs have been assigned. + bool normal() const { return Normal; } + iterator begin() { return Blocks.begin(); } iterator end() { return Blocks.end(); } @@ -1596,9 +1731,17 @@ public: const BasicBlock *exit() const { return Exit; } BasicBlock *exit() { return Exit; } + /// Return the number of blocks in the CFG. + /// Block::blockID() will return a number less than numBlocks(); + size_t numBlocks() const { return Blocks.size(); } + + /// Return the total number of instructions in the CFG. + /// This is useful for building instruction side-tables; + /// A call to SExpr::id() will return a number less than numInstructions(). + unsigned numInstructions() { return NumInstructions; } + inline void add(BasicBlock *BB) { - assert(BB->CFGPtr == nullptr || BB->CFGPtr == this); - BB->setBlockID(Blocks.size()); + assert(BB->CFGPtr == nullptr); BB->CFGPtr = this; Blocks.reserveCheck(1, Arena); Blocks.push_back(BB); @@ -1607,13 +1750,13 @@ public: void setEntry(BasicBlock *BB) { Entry = BB; } void setExit(BasicBlock *BB) { Exit = BB; } - // Set varable ids in all blocks. - void renumberVars(); + void computeNormalForm(); template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { Vs.enterCFG(*this); typename V::template Container Bbs(Vs, Blocks.size()); + for (auto *B : Blocks) { Bbs.push_back( B->traverse(Vs, Vs.subExprCtx(Ctx)) ); } @@ -1623,101 +1766,26 @@ public: template typename C::CType compare(const SCFG *E, C &Cmp) const { - // TODO -- implement CFG comparisons + // TODO: implement CFG comparisons return Cmp.comparePointers(this, E); } +private: + void renumberInstrs(); // assign unique ids to all instructions + private: MemRegionRef Arena; BlockArray Blocks; BasicBlock *Entry; BasicBlock *Exit; + unsigned NumInstructions; + bool Normal; }; -class Goto : public SExpr { -public: - static bool classof(const SExpr *E) { return E->opcode() == COP_Goto; } - - Goto(BasicBlock *B, unsigned I) - : SExpr(COP_Goto), TargetBlock(B), Index(I) {} - Goto(const Goto &G, BasicBlock *B, unsigned I) - : SExpr(COP_Goto), TargetBlock(B), Index(I) {} - - const BasicBlock *targetBlock() const { return TargetBlock; } - BasicBlock *targetBlock() { return TargetBlock; } - unsigned index() const { return Index; } - - template - typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { - BasicBlock *Ntb = Vs.reduceBasicBlockRef(TargetBlock); - return Vs.reduceGoto(*this, Ntb); - } - - template - typename C::CType compare(const Goto *E, C &Cmp) const { - // TODO -- implement CFG comparisons - return Cmp.comparePointers(this, E); - } - -private: - BasicBlock *TargetBlock; - unsigned Index; // Index into Phi nodes of target block. -}; - - -class Branch : public SExpr { -public: - static bool classof(const SExpr *E) { return E->opcode() == COP_Branch; } - - Branch(SExpr *C, BasicBlock *T, BasicBlock *E, unsigned TI, unsigned EI) - : SExpr(COP_Branch), Condition(C), ThenBlock(T), ElseBlock(E), - ThenIndex(TI), ElseIndex(EI) - {} - Branch(const Branch &Br, SExpr *C, BasicBlock *T, BasicBlock *E, - unsigned TI, unsigned EI) - : SExpr(COP_Branch), Condition(C), ThenBlock(T), ElseBlock(E), - ThenIndex(TI), ElseIndex(EI) - {} - - const SExpr *condition() const { return Condition; } - SExpr *condition() { return Condition; } - - const BasicBlock *thenBlock() const { return ThenBlock; } - BasicBlock *thenBlock() { return ThenBlock; } - - const BasicBlock *elseBlock() const { return ElseBlock; } - BasicBlock *elseBlock() { return ElseBlock; } - - unsigned thenIndex() const { return ThenIndex; } - unsigned elseIndex() const { return ElseIndex; } - - template - typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { - auto Nc = Vs.traverse(Condition, Vs.subExprCtx(Ctx)); - BasicBlock *Ntb = Vs.reduceBasicBlockRef(ThenBlock); - BasicBlock *Nte = Vs.reduceBasicBlockRef(ElseBlock); - return Vs.reduceBranch(*this, Nc, Ntb, Nte); - } - - template - typename C::CType compare(const Branch *E, C &Cmp) const { - // TODO -- implement CFG comparisons - return Cmp.comparePointers(this, E); - } - -private: - SExpr *Condition; - BasicBlock *ThenBlock; - BasicBlock *ElseBlock; - unsigned ThenIndex; - unsigned ElseIndex; -}; - - -// An identifier, e.g. 'foo' or 'x'. -// This is a pseduo-term; it will be lowered to a variable or projection. +/// An identifier, e.g. 'foo' or 'x'. +/// This is a pseduo-term; it will be lowered to a variable or projection. class Identifier : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Identifier; } @@ -1742,8 +1810,8 @@ private: }; -// An if-then-else expression. -// This is a pseduo-term; it will be lowered to a branch in a CFG. +/// An if-then-else expression. +/// This is a pseduo-term; it will be lowered to a branch in a CFG. class IfThenElse : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_IfThenElse; } @@ -1755,14 +1823,14 @@ public: : SExpr(I), Condition(C), ThenExpr(T), ElseExpr(E) { } - SExpr *condition() { return Condition.get(); } // Address to store to - const SExpr *condition() const { return Condition.get(); } + SExpr *condition() { return Condition; } // Address to store to + const SExpr *condition() const { return Condition; } - SExpr *thenExpr() { return ThenExpr.get(); } // Value to store - const SExpr *thenExpr() const { return ThenExpr.get(); } + SExpr *thenExpr() { return ThenExpr; } // Value to store + const SExpr *thenExpr() const { return ThenExpr; } - SExpr *elseExpr() { return ElseExpr.get(); } // Value to store - const SExpr *elseExpr() const { return ElseExpr.get(); } + SExpr *elseExpr() { return ElseExpr; } // Value to store + const SExpr *elseExpr() const { return ElseExpr; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1784,14 +1852,14 @@ public: } private: - SExprRef Condition; - SExprRef ThenExpr; - SExprRef ElseExpr; + SExpr* Condition; + SExpr* ThenExpr; + SExpr* ElseExpr; }; -// A let-expression, e.g. let x=t; u. -// This is a pseduo-term; it will be lowered to instructions in a CFG. +/// A let-expression, e.g. let x=t; u. +/// This is a pseduo-term; it will be lowered to instructions in a CFG. class Let : public SExpr { public: static bool classof(const SExpr *E) { return E->opcode() == COP_Let; } @@ -1806,8 +1874,8 @@ public: Variable *variableDecl() { return VarDecl; } const Variable *variableDecl() const { return VarDecl; } - SExpr *body() { return Body.get(); } - const SExpr *body() const { return Body.get(); } + SExpr *body() { return Body; } + const SExpr *body() const { return Body; } template typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { @@ -1834,14 +1902,14 @@ public: private: Variable *VarDecl; - SExprRef Body; + SExpr* Body; }; const SExpr *getCanonicalVal(const SExpr *E); SExpr* simplifyToCanonicalVal(SExpr *E); -void simplifyIncompleteArg(Variable *V, til::Phi *Ph); +void simplifyIncompleteArg(til::Phi *Ph); } // end namespace til diff --git a/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h b/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h index b21ad4336c..541f0bfd17 100644 --- a/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h +++ b/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h @@ -58,11 +58,16 @@ public: // Traverse an expression -- returning a result of type R_SExpr. // Override this method to do something for every expression, regardless // of which kind it is. - typename R::R_SExpr traverse(SExprRef &E, typename R::R_Ctx Ctx) { - return traverse(E.get(), Ctx); + // E is a reference, so this can be use for in-place updates. + // The type T must be a subclass of SExpr. + template + typename R::R_SExpr traverse(T* &E, typename R::R_Ctx Ctx) { + return traverseSExpr(E, Ctx); } - typename R::R_SExpr traverse(SExpr *E, typename R::R_Ctx Ctx) { + // Override this method to do something for every expression. + // Does not allow in-place updates. + typename R::R_SExpr traverseSExpr(SExpr *E, typename R::R_Ctx Ctx) { return traverseByCase(E, Ctx); } @@ -75,6 +80,7 @@ public: #include "ThreadSafetyOps.def" #undef TIL_OPCODE_DEF } + return self()->reduceNull(); } // Traverse e, by static dispatch on the type "X" of e. @@ -92,10 +98,10 @@ public: class SimpleReducerBase { public: enum TraversalKind { - TRV_Normal, - TRV_Decl, - TRV_Lazy, - TRV_Type + TRV_Normal, // ordinary subexpressions + TRV_Decl, // declarations (e.g. function bodies) + TRV_Lazy, // expressions that require lazy evaluation + TRV_Type // type expressions }; // R_Ctx defines a "context" for the traversal, which encodes information @@ -147,153 +153,6 @@ protected: }; -// Implements a traversal that makes a deep copy of an SExpr. -// The default behavior of reduce##X(...) is to create a copy of the original. -// Subclasses can override reduce##X to implement non-destructive rewriting -// passes. -template -class CopyReducer : public Traversal, - public CopyReducerBase { -public: - CopyReducer(MemRegionRef A) : CopyReducerBase(A) {} - -public: - R_SExpr reduceNull() { - return nullptr; - } - // R_SExpr reduceFuture(...) is never used. - - R_SExpr reduceUndefined(Undefined &Orig) { - return new (Arena) Undefined(Orig); - } - R_SExpr reduceWildcard(Wildcard &Orig) { - return new (Arena) Wildcard(Orig); - } - - R_SExpr reduceLiteral(Literal &Orig) { - return new (Arena) Literal(Orig); - } - template - R_SExpr reduceLiteralT(LiteralT &Orig) { - return new (Arena) LiteralT(Orig); - } - R_SExpr reduceLiteralPtr(LiteralPtr &Orig) { - return new (Arena) LiteralPtr(Orig); - } - - R_SExpr reduceFunction(Function &Orig, Variable *Nvd, R_SExpr E0) { - return new (Arena) Function(Orig, Nvd, E0); - } - R_SExpr reduceSFunction(SFunction &Orig, Variable *Nvd, R_SExpr E0) { - return new (Arena) SFunction(Orig, Nvd, E0); - } - R_SExpr reduceCode(Code &Orig, R_SExpr E0, R_SExpr E1) { - return new (Arena) Code(Orig, E0, E1); - } - R_SExpr reduceField(Field &Orig, R_SExpr E0, R_SExpr E1) { - return new (Arena) Field(Orig, E0, E1); - } - - R_SExpr reduceApply(Apply &Orig, R_SExpr E0, R_SExpr E1) { - return new (Arena) Apply(Orig, E0, E1); - } - R_SExpr reduceSApply(SApply &Orig, R_SExpr E0, R_SExpr E1) { - return new (Arena) SApply(Orig, E0, E1); - } - R_SExpr reduceProject(Project &Orig, R_SExpr E0) { - return new (Arena) Project(Orig, E0); - } - R_SExpr reduceCall(Call &Orig, R_SExpr E0) { - return new (Arena) Call(Orig, E0); - } - - R_SExpr reduceAlloc(Alloc &Orig, R_SExpr E0) { - return new (Arena) Alloc(Orig, E0); - } - R_SExpr reduceLoad(Load &Orig, R_SExpr E0) { - return new (Arena) Load(Orig, E0); - } - R_SExpr reduceStore(Store &Orig, R_SExpr E0, R_SExpr E1) { - return new (Arena) Store(Orig, E0, E1); - } - R_SExpr reduceArrayIndex(ArrayIndex &Orig, R_SExpr E0, R_SExpr E1) { - return new (Arena) ArrayIndex(Orig, E0, E1); - } - R_SExpr reduceArrayAdd(ArrayAdd &Orig, R_SExpr E0, R_SExpr E1) { - return new (Arena) ArrayAdd(Orig, E0, E1); - } - R_SExpr reduceUnaryOp(UnaryOp &Orig, R_SExpr E0) { - return new (Arena) UnaryOp(Orig, E0); - } - R_SExpr reduceBinaryOp(BinaryOp &Orig, R_SExpr E0, R_SExpr E1) { - return new (Arena) BinaryOp(Orig, E0, E1); - } - R_SExpr reduceCast(Cast &Orig, R_SExpr E0) { - return new (Arena) Cast(Orig, E0); - } - - R_SExpr reduceSCFG(SCFG &Orig, Container &Bbs) { - return nullptr; // FIXME: implement CFG rewriting - } - R_BasicBlock reduceBasicBlock(BasicBlock &Orig, Container &As, - Container &Is, R_SExpr T) { - return nullptr; // FIXME: implement CFG rewriting - } - R_SExpr reducePhi(Phi &Orig, Container &As) { - return new (Arena) Phi(Orig, std::move(As.Elems)); - } - R_SExpr reduceGoto(Goto &Orig, BasicBlock *B) { - return new (Arena) Goto(Orig, B, 0); // FIXME: set index - } - R_SExpr reduceBranch(Branch &O, R_SExpr C, BasicBlock *B0, BasicBlock *B1) { - return new (Arena) Branch(O, C, B0, B1, 0, 0); // FIXME: set indices - } - - R_SExpr reduceIdentifier(Identifier &Orig) { - return new (Arena) Identifier(Orig); - } - R_SExpr reduceIfThenElse(IfThenElse &Orig, R_SExpr C, R_SExpr T, R_SExpr E) { - return new (Arena) IfThenElse(Orig, C, T, E); - } - R_SExpr reduceLet(Let &Orig, Variable *Nvd, R_SExpr B) { - return new (Arena) Let(Orig, Nvd, B); - } - - // Create a new variable from orig, and push it onto the lexical scope. - Variable *enterScope(Variable &Orig, R_SExpr E0) { - return new (Arena) Variable(Orig, E0); - } - // Exit the lexical scope of orig. - void exitScope(const Variable &Orig) {} - - void enterCFG(SCFG &Cfg) {} - void exitCFG(SCFG &Cfg) {} - void enterBasicBlock(BasicBlock &BB) {} - void exitBasicBlock(BasicBlock &BB) {} - - // Map Variable references to their rewritten definitions. - Variable *reduceVariableRef(Variable *Ovd) { return Ovd; } - - // Map BasicBlock references to their rewritten definitions. - BasicBlock *reduceBasicBlockRef(BasicBlock *Obb) { return Obb; } -}; - - -class SExprCopier : public CopyReducer { -public: - typedef SExpr *R_SExpr; - - SExprCopier(MemRegionRef A) : CopyReducer(A) { } - - // Create a copy of e in region a. - static SExpr *copy(SExpr *E, MemRegionRef A) { - SExprCopier Copier(A); - return Copier.traverse(E, TRV_Normal); - } -}; - - - // Base class for visit traversals. class VisitReducerBase : public SimpleReducerBase { public: @@ -368,8 +227,8 @@ public: R_SExpr reduceSCFG(SCFG &Orig, Container Bbs) { return Bbs.Success; } - R_BasicBlock reduceBasicBlock(BasicBlock &Orig, Container &As, - Container &Is, R_SExpr T) { + R_BasicBlock reduceBasicBlock(BasicBlock &Orig, Container &As, + Container &Is, R_SExpr T) { return (As.Success && Is.Success && T); } R_SExpr reducePhi(Phi &Orig, Container &As) { @@ -381,6 +240,9 @@ public: R_SExpr reduceBranch(Branch &O, R_SExpr C, BasicBlock *B0, BasicBlock *B1) { return C; } + R_SExpr reduceReturn(Return &O, R_SExpr E) { + return E; + } R_SExpr reduceIdentifier(Identifier &Orig) { return true; @@ -433,7 +295,7 @@ public: #include "ThreadSafetyOps.def" #undef TIL_OPCODE_DEF } - llvm_unreachable("invalid enum"); + return false; } }; @@ -514,9 +376,9 @@ public: -inline std::ostream& operator<<(std::ostream& SS, llvm::StringRef R) { - return SS.write(R.data(), R.size()); -} +// inline std::ostream& operator<<(std::ostream& SS, StringRef R) { +// return SS.write(R.data(), R.size()); +// } // Pretty printer for TIL expressions template @@ -587,6 +449,7 @@ protected: case COP_Phi: return Prec_Atom; case COP_Goto: return Prec_Atom; case COP_Branch: return Prec_Atom; + case COP_Return: return Prec_Other; case COP_Identifier: return Prec_Atom; case COP_IfThenElse: return Prec_Other; @@ -595,22 +458,29 @@ protected: return Prec_MAX; } - void printBlockLabel(StreamType & SS, const BasicBlock *BB, unsigned index) { + void printBlockLabel(StreamType & SS, const BasicBlock *BB, int index) { if (!BB) { SS << "BB_null"; return; } SS << "BB_"; SS << BB->blockID(); - SS << ":"; - SS << index; + if (index >= 0) { + SS << ":"; + SS << index; + } } - void printSExpr(const SExpr *E, StreamType &SS, unsigned P) { + + void printSExpr(const SExpr *E, StreamType &SS, unsigned P, bool Sub=true) { if (!E) { self()->printNull(SS); return; } + if (Sub && E->block() && E->opcode() != COP_Variable) { + SS << "_x" << E->id(); + return; + } if (self()->precedence(E) > P) { // Wrap expr in () if necessary. SS << "("; @@ -740,20 +610,11 @@ protected: SS << E->clangDecl()->getNameAsString(); } - void printVariable(const Variable *V, StreamType &SS, bool IsVarDecl = false) { - if (!IsVarDecl && Cleanup) { - const SExpr* E = getCanonicalVal(V); - if (E != V) { - printSExpr(E, SS, Prec_Atom); - return; - } - } - if (V->kind() == Variable::VK_LetBB) - SS << V->name() << V->getBlockID() << "_" << V->getID(); - else if (CStyle && V->kind() == Variable::VK_SFun) + void printVariable(const Variable *V, StreamType &SS, bool IsVarDecl=false) { + if (CStyle && V->kind() == Variable::VK_SFun) SS << "this"; else - SS << V->name() << V->getID(); + SS << V->name() << V->id(); } void printFunction(const Function *E, StreamType &SS, unsigned sugared = 0) { @@ -927,32 +788,38 @@ protected: newline(SS); } + + void printBBInstr(const SExpr *E, StreamType &SS) { + bool Sub = false; + if (E->opcode() == COP_Variable) { + auto *V = cast(E); + SS << "let " << V->name() << V->id() << " = "; + E = V->definition(); + Sub = true; + } + else if (E->opcode() != COP_Store) { + SS << "let _x" << E->id() << " = "; + } + self()->printSExpr(E, SS, Prec_MAX, Sub); + SS << ";"; + newline(SS); + } + void printBasicBlock(const BasicBlock *E, StreamType &SS) { SS << "BB_" << E->blockID() << ":"; if (E->parent()) SS << " BB_" << E->parent()->blockID(); newline(SS); - for (auto *A : E->arguments()) { - SS << "let "; - self()->printVariable(A, SS, true); - SS << " = "; - self()->printSExpr(A->definition(), SS, Prec_MAX); - SS << ";"; - newline(SS); - } - for (auto *I : E->instructions()) { - if (I->definition()->opcode() != COP_Store) { - SS << "let "; - self()->printVariable(I, SS, true); - SS << " = "; - } - self()->printSExpr(I->definition(), SS, Prec_MAX); - SS << ";"; - newline(SS); - } + + for (auto *A : E->arguments()) + printBBInstr(A, SS); + + for (auto *I : E->instructions()) + printBBInstr(I, SS); + const SExpr *T = E->terminator(); if (T) { - self()->printSExpr(T, SS, Prec_MAX); + self()->printSExpr(T, SS, Prec_MAX, false); SS << ";"; newline(SS); } @@ -983,9 +850,14 @@ protected: SS << "branch ("; self()->printSExpr(E->condition(), SS, Prec_MAX); SS << ") "; - printBlockLabel(SS, E->thenBlock(), E->thenIndex()); + printBlockLabel(SS, E->thenBlock(), -1); SS << " "; - printBlockLabel(SS, E->elseBlock(), E->elseIndex()); + printBlockLabel(SS, E->elseBlock(), -1); + } + + void printReturn(const Return *E, StreamType &SS) { + SS << "return "; + self()->printSExpr(E->returnValue(), SS, Prec_Other); } void printIdentifier(const Identifier *E, StreamType &SS) { diff --git a/include/clang/Analysis/Analyses/ThreadSafetyUtil.h b/include/clang/Analysis/Analyses/ThreadSafetyUtil.h index c0a5a7a94d..8c987b097c 100644 --- a/include/clang/Analysis/Analyses/ThreadSafetyUtil.h +++ b/include/clang/Analysis/Analyses/ThreadSafetyUtil.h @@ -142,20 +142,35 @@ public: assert(i < Size && "Array index out of bounds."); return Data[i]; } + T &back() { + assert(Size && "No elements in the array."); + return Data[Size - 1]; + } + const T &back() const { + assert(Size && "No elements in the array."); + return Data[Size - 1]; + } iterator begin() { return Data; } + iterator end() { return Data + Size; } + const_iterator begin() const { return Data; } - iterator end() { return Data + Size; } - const_iterator end() const { return Data + Size; } + const_iterator end() const { return Data + Size; } const_iterator cbegin() const { return Data; } - const_iterator cend() const { return Data + Size; } + const_iterator cend() const { return Data + Size; } void push_back(const T &Elem) { assert(Size < Capacity); Data[Size++] = Elem; } + // drop last n elements from array + void drop(unsigned n = 0) { + assert(Size > n); + Size -= n; + } + void setValues(unsigned Sz, const T& C) { assert(Sz <= Capacity); Size = Sz; @@ -173,6 +188,37 @@ public: return J - Osz; } + // An adaptor to reverse a simple array + class ReverseAdaptor { + public: + ReverseAdaptor(SimpleArray &Array) : Array(Array) {} + // A reverse iterator used by the reverse adaptor + class Iterator { + public: + Iterator(T *Data) : Data(Data) {} + T &operator*() { return *Data; } + const T &operator*() const { return *Data; } + Iterator &operator++() { + --Data; + return *this; + } + bool operator!=(Iterator Other) { return Data != Other.Data; } + + private: + T *Data; + }; + Iterator begin() { return Array.end() - 1; } + Iterator end() { return Array.begin() - 1; } + const Iterator begin() const { return Array.end() - 1; } + const Iterator end() const { return Array.begin() - 1; } + + private: + SimpleArray &Array; + }; + + const ReverseAdaptor reverse() const { return ReverseAdaptor(*this); } + ReverseAdaptor reverse() { return ReverseAdaptor(*this); } + private: // std::max is annoying here, because it requires a reference, // thus forcing InitialCapacity to be initialized outside the .h file. @@ -187,6 +233,7 @@ private: size_t Capacity; }; + } // end namespace til @@ -312,6 +359,12 @@ private: }; +inline std::ostream& operator<<(std::ostream& ss, const StringRef str) { + ss << str.data(); + return ss; +} + + } // end namespace threadSafety } // end namespace clang diff --git a/lib/Analysis/ThreadSafetyCommon.cpp b/lib/Analysis/ThreadSafetyCommon.cpp index e9b1f6413c..88a1cbfd4e 100644 --- a/lib/Analysis/ThreadSafetyCommon.cpp +++ b/lib/Analysis/ThreadSafetyCommon.cpp @@ -63,11 +63,9 @@ std::string getSourceLiteralString(const clang::Expr *CE) { namespace til { // Return true if E is a variable that points to an incomplete Phi node. -static bool isIncompleteVar(const SExpr *E) { - if (const auto *V = dyn_cast(E)) { - if (const auto *Ph = dyn_cast(V->definition())) - return Ph->status() == Phi::PH_Incomplete; - } +static bool isIncompletePhi(const SExpr *E) { + if (const auto *Ph = dyn_cast(E)) + return Ph->status() == Phi::PH_Incomplete; return false; } @@ -320,6 +318,8 @@ til::SExpr *SExprBuilder::translateCXXThisExpr(const CXXThisExpr *TE, const ValueDecl *getValueDeclFromSExpr(const til::SExpr *E) { if (auto *V = dyn_cast(E)) return V->clangDecl(); + if (auto *Ph = dyn_cast(E)) + return Ph->clangDecl(); if (auto *P = dyn_cast(E)) return P->clangDecl(); if (auto *L = dyn_cast(E)) @@ -641,14 +641,14 @@ SExprBuilder::translateDeclStmt(const DeclStmt *S, CallingContext *Ctx) { // If E is trivial returns E. til::SExpr *SExprBuilder::addStatement(til::SExpr* E, const Stmt *S, const ValueDecl *VD) { - if (!E || !CurrentBB || til::ThreadSafetyTIL::isTrivial(E)) + if (!E || !CurrentBB || E->block() || til::ThreadSafetyTIL::isTrivial(E)) return E; - - til::Variable *V = new (Arena) til::Variable(E, VD); - CurrentInstructions.push_back(V); + if (VD) + E = new (Arena) til::Variable(E, VD); + CurrentInstructions.push_back(E); if (S) - insertStmt(S, V); - return V; + insertStmt(S, E); + return E; } @@ -705,11 +705,11 @@ void SExprBuilder::makePhiNodeVar(unsigned i, unsigned NPreds, til::SExpr *E) { unsigned ArgIndex = CurrentBlockInfo->ProcessedPredecessors; assert(ArgIndex > 0 && ArgIndex < NPreds); - til::Variable *V = dyn_cast(CurrentLVarMap[i].second); - if (V && V->getBlockID() == CurrentBB->blockID()) { + til::SExpr *CurrE = CurrentLVarMap[i].second; + if (CurrE->block() == CurrentBB) { // We already have a Phi node in the current block, // so just add the new variable to the Phi node. - til::Phi *Ph = dyn_cast(V->definition()); + til::Phi *Ph = dyn_cast(CurrE); assert(Ph && "Expecting Phi node."); if (E) Ph->values()[ArgIndex] = E; @@ -718,27 +718,26 @@ void SExprBuilder::makePhiNodeVar(unsigned i, unsigned NPreds, til::SExpr *E) { // Make a new phi node: phi(..., E) // All phi args up to the current index are set to the current value. - til::SExpr *CurrE = CurrentLVarMap[i].second; til::Phi *Ph = new (Arena) til::Phi(Arena, NPreds); Ph->values().setValues(NPreds, nullptr); for (unsigned PIdx = 0; PIdx < ArgIndex; ++PIdx) Ph->values()[PIdx] = CurrE; if (E) Ph->values()[ArgIndex] = E; + Ph->setClangDecl(CurrentLVarMap[i].first); // If E is from a back-edge, or either E or CurrE are incomplete, then // mark this node as incomplete; we may need to remove it later. - if (!E || isIncompleteVar(E) || isIncompleteVar(CurrE)) { + if (!E || isIncompletePhi(E) || isIncompletePhi(CurrE)) { Ph->setStatus(til::Phi::PH_Incomplete); } // Add Phi node to current block, and update CurrentLVarMap[i] - auto *Var = new (Arena) til::Variable(Ph, CurrentLVarMap[i].first); - CurrentArguments.push_back(Var); + CurrentArguments.push_back(Ph); if (Ph->status() == til::Phi::PH_Incomplete) - IncompleteArgs.push_back(Var); + IncompleteArgs.push_back(Ph); CurrentLVarMap.makeWritable(); - CurrentLVarMap.elem(i).second = Var; + CurrentLVarMap.elem(i).second = Ph; } @@ -812,15 +811,13 @@ void SExprBuilder::mergePhiNodesBackEdge(const CFGBlock *Blk) { unsigned ArgIndex = BBInfo[Blk->getBlockID()].ProcessedPredecessors; assert(ArgIndex > 0 && ArgIndex < BB->numPredecessors()); - for (til::Variable *V : BB->arguments()) { - til::Phi *Ph = dyn_cast_or_null(V->definition()); + for (til::SExpr *PE : BB->arguments()) { + til::Phi *Ph = dyn_cast_or_null(PE); assert(Ph && "Expecting Phi Node."); assert(Ph->values()[ArgIndex] == nullptr && "Wrong index for back edge."); - assert(V->clangDecl() && "No local variable for Phi node."); - til::SExpr *E = lookupVarDecl(V->clangDecl()); + til::SExpr *E = lookupVarDecl(Ph->clangDecl()); assert(E && "Couldn't find local variable for Phi node."); - Ph->values()[ArgIndex] = E; } } @@ -899,8 +896,8 @@ void SExprBuilder::enterCFGBlockBody(const CFGBlock *B) { // Push those arguments onto the basic block. CurrentBB->arguments().reserve( static_cast(CurrentArguments.size()), Arena); - for (auto *V : CurrentArguments) - CurrentBB->addArgument(V); + for (auto *A : CurrentArguments) + CurrentBB->addArgument(A); } @@ -934,7 +931,7 @@ void SExprBuilder::exitCFGBlockBody(const CFGBlock *B) { til::BasicBlock *BB = *It ? lookupBlock(*It) : nullptr; // TODO: set index unsigned Idx = BB ? BB->findPredecessorIndex(CurrentBB) : 0; - til::SExpr *Tm = new (Arena) til::Goto(BB, Idx); + auto *Tm = new (Arena) til::Goto(BB, Idx); CurrentBB->setTerminator(Tm); } else if (N == 2) { @@ -942,9 +939,8 @@ void SExprBuilder::exitCFGBlockBody(const CFGBlock *B) { til::BasicBlock *BB1 = *It ? lookupBlock(*It) : nullptr; ++It; til::BasicBlock *BB2 = *It ? lookupBlock(*It) : nullptr; - unsigned Idx1 = BB1 ? BB1->findPredecessorIndex(CurrentBB) : 0; - unsigned Idx2 = BB2 ? BB2->findPredecessorIndex(CurrentBB) : 0; - til::SExpr *Tm = new (Arena) til::Branch(C, BB1, BB2, Idx1, Idx2); + // FIXME: make sure these arent' critical edges. + auto *Tm = new (Arena) til::Branch(C, BB1, BB2); CurrentBB->setTerminator(Tm); } } @@ -971,10 +967,9 @@ void SExprBuilder::exitCFGBlock(const CFGBlock *B) { void SExprBuilder::exitCFG(const CFGBlock *Last) { - for (auto *V : IncompleteArgs) { - til::Phi *Ph = dyn_cast(V->definition()); - if (Ph && Ph->status() == til::Phi::PH_Incomplete) - simplifyIncompleteArg(V, Ph); + for (auto *Ph : IncompleteArgs) { + if (Ph->status() == til::Phi::PH_Incomplete) + simplifyIncompleteArg(Ph); } CurrentArguments.clear(); diff --git a/lib/Analysis/ThreadSafetyTIL.cpp b/lib/Analysis/ThreadSafetyTIL.cpp index 0bb7d4c2db..a15063631d 100644 --- a/lib/Analysis/ThreadSafetyTIL.cpp +++ b/lib/Analysis/ThreadSafetyTIL.cpp @@ -48,12 +48,20 @@ StringRef getBinaryOpcodeString(TIL_BinaryOpcode Op) { } +SExpr* Future::force() { + Status = FS_evaluating; + Result = compute(); + Status = FS_done; + return Result; +} + + unsigned BasicBlock::addPredecessor(BasicBlock *Pred) { unsigned Idx = Predecessors.size(); Predecessors.reserveCheck(1, Arena); Predecessors.push_back(Pred); - for (Variable *V : Args) { - if (Phi* Ph = dyn_cast(V->definition())) { + for (SExpr *E : Args) { + if (Phi* Ph = dyn_cast(E)) { Ph->values().reserveCheck(1, Arena); Ph->values().push_back(nullptr); } @@ -61,105 +69,73 @@ unsigned BasicBlock::addPredecessor(BasicBlock *Pred) { return Idx; } + void BasicBlock::reservePredecessors(unsigned NumPreds) { Predecessors.reserve(NumPreds, Arena); - for (Variable *V : Args) { - if (Phi* Ph = dyn_cast(V->definition())) { + for (SExpr *E : Args) { + if (Phi* Ph = dyn_cast(E)) { Ph->values().reserve(NumPreds, Arena); } } } -void BasicBlock::renumberVars() { - unsigned VID = 0; - for (Variable *V : Args) { - V->setID(BlockID, VID++); - } - for (Variable *V : Instrs) { - V->setID(BlockID, VID++); - } -} - -void SCFG::renumberVars() { - for (BasicBlock *B : Blocks) { - B->renumberVars(); - } -} - - // If E is a variable, then trace back through any aliases or redundant // Phi nodes to find the canonical definition. const SExpr *getCanonicalVal(const SExpr *E) { - while (auto *V = dyn_cast(E)) { - const SExpr *D; - do { - if (V->kind() != Variable::VK_Let) - return V; - D = V->definition(); - auto *V2 = dyn_cast(D); - if (V2) - V = V2; - else - break; - } while (true); - - if (ThreadSafetyTIL::isTrivial(D)) - return D; - - if (const Phi *Ph = dyn_cast(D)) { + while (true) { + if (auto *V = dyn_cast(E)) { + if (V->kind() == Variable::VK_Let) { + E = V->definition(); + continue; + } + } + if (const Phi *Ph = dyn_cast(E)) { if (Ph->status() == Phi::PH_SingleVal) { E = Ph->values()[0]; continue; } } - return V; + break; } return E; } - // If E is a variable, then trace back through any aliases or redundant // Phi nodes to find the canonical definition. // The non-const version will simplify incomplete Phi nodes. SExpr *simplifyToCanonicalVal(SExpr *E) { - while (auto *V = dyn_cast(E)) { - SExpr *D; - do { + while (true) { + if (auto *V = dyn_cast(E)) { if (V->kind() != Variable::VK_Let) return V; - D = V->definition(); - auto *V2 = dyn_cast(D); - if (V2) - V = V2; - else - break; - } while (true); - - if (ThreadSafetyTIL::isTrivial(D)) - return D; - - if (Phi *Ph = dyn_cast(D)) { + // Eliminate redundant variables, e.g. x = y, or x = 5, + // but keep anything more complicated. + if (til::ThreadSafetyTIL::isTrivial(V->definition())) { + E = V->definition(); + continue; + } + return V; + } + if (auto *Ph = dyn_cast(E)) { if (Ph->status() == Phi::PH_Incomplete) - simplifyIncompleteArg(V, Ph); - + simplifyIncompleteArg(Ph); + // Eliminate redundant Phi nodes. if (Ph->status() == Phi::PH_SingleVal) { E = Ph->values()[0]; continue; } } - return V; + return E; } - return E; } - // Trace the arguments of an incomplete Phi node to see if they have the same // canonical definition. If so, mark the Phi node as redundant. // getCanonicalVal() will recursively call simplifyIncompletePhi(). -void simplifyIncompleteArg(Variable *V, til::Phi *Ph) { +void simplifyIncompleteArg(til::Phi *Ph) { assert(Ph && Ph->status() == Phi::PH_Incomplete); // eliminate infinite recursion -- assume that this node is not redundant. @@ -168,18 +144,200 @@ void simplifyIncompleteArg(Variable *V, til::Phi *Ph) { SExpr *E0 = simplifyToCanonicalVal(Ph->values()[0]); for (unsigned i=1, n=Ph->values().size(); ivalues()[i]); - if (Ei == V) + if (Ei == Ph) continue; // Recursive reference to itself. Don't count. if (Ei != E0) { return; // Status is already set to MultiVal. } } Ph->setStatus(Phi::PH_SingleVal); - // Eliminate Redundant Phi node. - V->setDefinition(Ph->values()[0]); } +// Renumbers the arguments and instructions to have unique, sequential IDs. +int BasicBlock::renumberInstrs(int ID) { + for (auto *Arg : Args) + Arg->setID(this, ID++); + for (auto *Instr : Instrs) + Instr->setID(this, ID++); + TermInstr->setID(this, ID++); + return ID; +} + +// Sorts the CFGs blocks using a reverse post-order depth-first traversal. +// Each block will be written into the Blocks array in order, and its BlockID +// will be set to the index in the array. Sorting should start from the entry +// block, and ID should be the total number of blocks. +int BasicBlock::topologicalSort(SimpleArray& Blocks, int ID) { + if (Visited) return ID; + Visited = 1; + for (auto *Block : successors()) + ID = Block->topologicalSort(Blocks, ID); + // set ID and update block array in place. + // We may lose pointers to unreachable blocks. + assert(ID > 0); + BlockID = --ID; + Blocks[BlockID] = this; + return ID; +} + +// Performs a reverse topological traversal, starting from the exit block and +// following back-edges. The dominator is serialized before any predecessors, +// which guarantees that all blocks are serialized after their dominator and +// before their post-dominator (because it's a reverse topological traversal). +// ID should be initially set to 0. +// +// This sort assumes that (1) dominators have been computed, (2) there are no +// critical edges, and (3) the entry block is reachable from the exit block +// and no blocks are accessable via traversal of back-edges from the exit that +// weren't accessable via forward edges from the entry. +int BasicBlock::topologicalFinalSort(SimpleArray& Blocks, int ID) { + // Visited is assumed to have been set by the topologicalSort. This pass + // assumes !Visited means that we've visited this node before. + if (!Visited) return ID; + Visited = 0; + if (DominatorNode.Parent) + ID = DominatorNode.Parent->topologicalFinalSort(Blocks, ID); + for (auto *Pred : Predecessors) + ID = Pred->topologicalFinalSort(Blocks, ID); + assert(ID < Blocks.size()); + BlockID = ID++; + Blocks[BlockID] = this; + return ID; +} + +// Computes the immediate dominator of the current block. Assumes that all of +// its predecessors have already computed their dominators. This is achieved +// by visiting the nodes in topological order. +void BasicBlock::computeDominator() { + BasicBlock *Candidate = nullptr; + // Walk backwards from each predecessor to find the common dominator node. + for (auto *Pred : Predecessors) { + // Skip back-edges + if (Pred->BlockID >= BlockID) continue; + // If we don't yet have a candidate for dominator yet, take this one. + if (Candidate == nullptr) { + Candidate = Pred; + continue; + } + // Walk the alternate and current candidate back to find a common ancestor. + auto *Alternate = Pred; + while (Alternate != Candidate) { + if (Candidate->BlockID > Alternate->BlockID) + Candidate = Candidate->DominatorNode.Parent; + else + Alternate = Alternate->DominatorNode.Parent; + } + } + DominatorNode.Parent = Candidate; + DominatorNode.SizeOfSubTree = 1; +} + +// Computes the immediate post-dominator of the current block. Assumes that all +// of its successors have already computed their post-dominators. This is +// achieved visiting the nodes in reverse topological order. +void BasicBlock::computePostDominator() { + BasicBlock *Candidate = nullptr; + // Walk back from each predecessor to find the common post-dominator node. + for (auto *Succ : successors()) { + // Skip back-edges + if (Succ->BlockID <= BlockID) continue; + // If we don't yet have a candidate for post-dominator yet, take this one. + if (Candidate == nullptr) { + Candidate = Succ; + continue; + } + // Walk the alternate and current candidate back to find a common ancestor. + auto *Alternate = Succ; + while (Alternate != Candidate) { + if (Candidate->BlockID < Alternate->BlockID) + Candidate = Candidate->PostDominatorNode.Parent; + else + Alternate = Alternate->PostDominatorNode.Parent; + } + } + PostDominatorNode.Parent = Candidate; + PostDominatorNode.SizeOfSubTree = 1; +} + + +// Renumber instructions in all blocks +void SCFG::renumberInstrs() { + int InstrID = 0; + for (auto *Block : Blocks) + InstrID = Block->renumberInstrs(InstrID); +} + + +static inline void computeNodeSize(BasicBlock *B, + BasicBlock::TopologyNode BasicBlock::*TN) { + BasicBlock::TopologyNode *N = &(B->*TN); + if (N->Parent) { + BasicBlock::TopologyNode *P = &(N->Parent->*TN); + // Initially set ID relative to the (as yet uncomputed) parent ID + N->NodeID = P->SizeOfSubTree; + P->SizeOfSubTree += N->SizeOfSubTree; + } +} + +static inline void computeNodeID(BasicBlock *B, + BasicBlock::TopologyNode BasicBlock::*TN) { + BasicBlock::TopologyNode *N = &(B->*TN); + if (N->Parent) { + BasicBlock::TopologyNode *P = &(N->Parent->*TN); + N->NodeID += P->NodeID; // Fix NodeIDs relative to starting node. + } +} + + +// Normalizes a CFG. Normalization has a few major components: +// 1) Removing unreachable blocks. +// 2) Computing dominators and post-dominators +// 3) Topologically sorting the blocks into the "Blocks" array. +void SCFG::computeNormalForm() { + // Topologically sort the blocks starting from the entry block. + int NumUnreachableBlocks = Entry->topologicalSort(Blocks, Blocks.size()); + if (NumUnreachableBlocks > 0) { + // If there were unreachable blocks shift everything down, and delete them. + for (size_t I = NumUnreachableBlocks, E = Blocks.size(); I < E; ++I) { + size_t NI = I - NumUnreachableBlocks; + Blocks[NI] = Blocks[I]; + Blocks[NI]->BlockID = NI; + // FIXME: clean up predecessor pointers to unreachable blocks? + } + Blocks.drop(NumUnreachableBlocks); + } + + // Compute dominators. + for (auto *Block : Blocks) + Block->computeDominator(); + + // Once dominators have been computed, the final sort may be performed. + int NumBlocks = Exit->topologicalFinalSort(Blocks, 0); + assert(NumBlocks == Blocks.size()); + (void) NumBlocks; + + // Renumber the instructions now that we have a final sort. + renumberInstrs(); + + // Compute post-dominators and compute the sizes of each node in the + // dominator tree. + for (auto *Block : Blocks.reverse()) { + Block->computePostDominator(); + computeNodeSize(Block, &BasicBlock::DominatorNode); + } + // Compute the sizes of each node in the post-dominator tree and assign IDs in + // the dominator tree. + for (auto *Block : Blocks) { + computeNodeID(Block, &BasicBlock::DominatorNode); + computeNodeSize(Block, &BasicBlock::PostDominatorNode); + } + // Assign IDs in the post-dominator tree. + for (auto *Block : Blocks.reverse()) { + computeNodeID(Block, &BasicBlock::PostDominatorNode); + } +} + } // end namespace til } // end namespace threadSafety } // end namespace clang