1 //===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines SymbolManager, a class that manages symbolic values
10 // created for use by ExprEngine and related classes.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/AnalysisDeclContext.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/FoldingSet.h"
27 #include "llvm/Support/Allocator.h"
37 class BasicValueFactory;
40 ///A symbol representing the value stored at a MemRegion.
41 class SymbolRegionValue : public SymbolData {
42 const TypedValueRegion *R;
45 SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
46 : SymbolData(SymbolRegionValueKind, sym), R(r) {
48 assert(isValidTypeForSymbol(r->getValueType()));
51 const TypedValueRegion* getRegion() const { return R; }
53 static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
54 profile.AddInteger((unsigned) SymbolRegionValueKind);
55 profile.AddPointer(R);
58 void Profile(llvm::FoldingSetNodeID& profile) override {
62 void dumpToStream(raw_ostream &os) const override;
63 const MemRegion *getOriginRegion() const override { return getRegion(); }
65 QualType getType() const override;
67 // Implement isa<T> support.
68 static bool classof(const SymExpr *SE) {
69 return SE->getKind() == SymbolRegionValueKind;
73 /// A symbol representing the result of an expression in the case when we do
74 /// not know anything about what the expression is.
75 class SymbolConjured : public SymbolData {
79 const LocationContext *LCtx;
80 const void *SymbolTag;
83 SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
84 QualType t, unsigned count, const void *symbolTag)
85 : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
86 LCtx(lctx), SymbolTag(symbolTag) {
87 // FIXME: 's' might be a nullptr if we're conducting invalidation
88 // that was caused by a destructor call on a temporary object,
89 // which has no statement associated with it.
90 // Due to this, we might be creating the same invalidation symbol for
91 // two different invalidation passes (for two different temporaries).
93 assert(isValidTypeForSymbol(t));
96 const Stmt *getStmt() const { return S; }
97 unsigned getCount() const { return Count; }
98 const void *getTag() const { return SymbolTag; }
100 QualType getType() const override;
102 void dumpToStream(raw_ostream &os) const override;
104 static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
105 QualType T, unsigned Count, const LocationContext *LCtx,
106 const void *SymbolTag) {
107 profile.AddInteger((unsigned) SymbolConjuredKind);
108 profile.AddPointer(S);
109 profile.AddPointer(LCtx);
111 profile.AddInteger(Count);
112 profile.AddPointer(SymbolTag);
115 void Profile(llvm::FoldingSetNodeID& profile) override {
116 Profile(profile, S, T, Count, LCtx, SymbolTag);
119 // Implement isa<T> support.
120 static bool classof(const SymExpr *SE) {
121 return SE->getKind() == SymbolConjuredKind;
125 /// A symbol representing the value of a MemRegion whose parent region has
127 class SymbolDerived : public SymbolData {
128 SymbolRef parentSymbol;
129 const TypedValueRegion *R;
132 SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
133 : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
136 assert(isValidTypeForSymbol(r->getValueType()));
139 SymbolRef getParentSymbol() const { return parentSymbol; }
140 const TypedValueRegion *getRegion() const { return R; }
142 QualType getType() const override;
144 void dumpToStream(raw_ostream &os) const override;
145 const MemRegion *getOriginRegion() const override { return getRegion(); }
147 static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
148 const TypedValueRegion *r) {
149 profile.AddInteger((unsigned) SymbolDerivedKind);
150 profile.AddPointer(r);
151 profile.AddPointer(parent);
154 void Profile(llvm::FoldingSetNodeID& profile) override {
155 Profile(profile, parentSymbol, R);
158 // Implement isa<T> support.
159 static bool classof(const SymExpr *SE) {
160 return SE->getKind() == SymbolDerivedKind;
164 /// SymbolExtent - Represents the extent (size in bytes) of a bounded region.
165 /// Clients should not ask the SymbolManager for a region's extent. Always use
166 /// SubRegion::getExtent instead -- the value returned may not be a symbol.
167 class SymbolExtent : public SymbolData {
171 SymbolExtent(SymbolID sym, const SubRegion *r)
172 : SymbolData(SymbolExtentKind, sym), R(r) {
176 const SubRegion *getRegion() const { return R; }
178 QualType getType() const override;
180 void dumpToStream(raw_ostream &os) const override;
182 static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
183 profile.AddInteger((unsigned) SymbolExtentKind);
184 profile.AddPointer(R);
187 void Profile(llvm::FoldingSetNodeID& profile) override {
191 // Implement isa<T> support.
192 static bool classof(const SymExpr *SE) {
193 return SE->getKind() == SymbolExtentKind;
197 /// SymbolMetadata - Represents path-dependent metadata about a specific region.
198 /// Metadata symbols remain live as long as they are marked as in use before
199 /// dead-symbol sweeping AND their associated regions are still alive.
200 /// Intended for use by checkers.
201 class SymbolMetadata : public SymbolData {
205 const LocationContext *LCtx;
210 SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
211 const LocationContext *LCtx, unsigned count, const void *tag)
212 : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
213 Count(count), Tag(tag) {
216 assert(isValidTypeForSymbol(t));
221 const MemRegion *getRegion() const { return R; }
222 const Stmt *getStmt() const { return S; }
223 const LocationContext *getLocationContext() const { return LCtx; }
224 unsigned getCount() const { return Count; }
225 const void *getTag() const { return Tag; }
227 QualType getType() const override;
229 void dumpToStream(raw_ostream &os) const override;
231 static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
232 const Stmt *S, QualType T, const LocationContext *LCtx,
233 unsigned Count, const void *Tag) {
234 profile.AddInteger((unsigned) SymbolMetadataKind);
235 profile.AddPointer(R);
236 profile.AddPointer(S);
238 profile.AddPointer(LCtx);
239 profile.AddInteger(Count);
240 profile.AddPointer(Tag);
243 void Profile(llvm::FoldingSetNodeID& profile) override {
244 Profile(profile, R, S, T, LCtx, Count, Tag);
247 // Implement isa<T> support.
248 static bool classof(const SymExpr *SE) {
249 return SE->getKind() == SymbolMetadataKind;
253 /// Represents a cast expression.
254 class SymbolCast : public SymExpr {
255 const SymExpr *Operand;
257 /// Type of the operand.
260 /// The type of the result.
264 SymbolCast(const SymExpr *In, QualType From, QualType To)
265 : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
267 assert(isValidTypeForSymbol(From));
268 // FIXME: GenericTaintChecker creates symbols of void type.
269 // Otherwise, 'To' should also be a valid type.
272 unsigned computeComplexity() const override {
274 Complexity = 1 + Operand->computeComplexity();
278 QualType getType() const override { return ToTy; }
280 const SymExpr *getOperand() const { return Operand; }
282 void dumpToStream(raw_ostream &os) const override;
284 static void Profile(llvm::FoldingSetNodeID& ID,
285 const SymExpr *In, QualType From, QualType To) {
286 ID.AddInteger((unsigned) SymbolCastKind);
292 void Profile(llvm::FoldingSetNodeID& ID) override {
293 Profile(ID, Operand, FromTy, ToTy);
296 // Implement isa<T> support.
297 static bool classof(const SymExpr *SE) {
298 return SE->getKind() == SymbolCastKind;
302 /// Represents a symbolic expression involving a binary operator
303 class BinarySymExpr : public SymExpr {
304 BinaryOperator::Opcode Op;
308 BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
309 : SymExpr(k), Op(op), T(t) {
310 assert(classof(this));
311 // Binary expressions are results of arithmetic. Pointer arithmetic is not
312 // handled by binary expressions, but it is instead handled by applying
313 // sub-regions to regions.
314 assert(isValidTypeForSymbol(t) && !Loc::isLocType(t));
318 // FIXME: We probably need to make this out-of-line to avoid redundant
319 // generation of virtual functions.
320 QualType getType() const override { return T; }
322 BinaryOperator::Opcode getOpcode() const { return Op; }
324 // Implement isa<T> support.
325 static bool classof(const SymExpr *SE) {
326 Kind k = SE->getKind();
327 return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS;
331 /// Represents a symbolic expression like 'x' + 3.
332 class SymIntExpr : public BinarySymExpr {
334 const llvm::APSInt& RHS;
337 SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
338 const llvm::APSInt &rhs, QualType t)
339 : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) {
343 void dumpToStream(raw_ostream &os) const override;
345 const SymExpr *getLHS() const { return LHS; }
346 const llvm::APSInt &getRHS() const { return RHS; }
348 unsigned computeComplexity() const override {
350 Complexity = 1 + LHS->computeComplexity();
354 static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
355 BinaryOperator::Opcode op, const llvm::APSInt& rhs,
357 ID.AddInteger((unsigned) SymIntExprKind);
364 void Profile(llvm::FoldingSetNodeID& ID) override {
365 Profile(ID, LHS, getOpcode(), RHS, getType());
368 // Implement isa<T> support.
369 static bool classof(const SymExpr *SE) {
370 return SE->getKind() == SymIntExprKind;
374 /// Represents a symbolic expression like 3 - 'x'.
375 class IntSymExpr : public BinarySymExpr {
376 const llvm::APSInt& LHS;
380 IntSymExpr(const llvm::APSInt &lhs, BinaryOperator::Opcode op,
381 const SymExpr *rhs, QualType t)
382 : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) {
386 void dumpToStream(raw_ostream &os) const override;
388 const SymExpr *getRHS() const { return RHS; }
389 const llvm::APSInt &getLHS() const { return LHS; }
391 unsigned computeComplexity() const override {
393 Complexity = 1 + RHS->computeComplexity();
397 static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs,
398 BinaryOperator::Opcode op, const SymExpr *rhs,
400 ID.AddInteger((unsigned) IntSymExprKind);
407 void Profile(llvm::FoldingSetNodeID& ID) override {
408 Profile(ID, LHS, getOpcode(), RHS, getType());
411 // Implement isa<T> support.
412 static bool classof(const SymExpr *SE) {
413 return SE->getKind() == IntSymExprKind;
417 /// Represents a symbolic expression like 'x' + 'y'.
418 class SymSymExpr : public BinarySymExpr {
423 SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs,
425 : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) {
430 const SymExpr *getLHS() const { return LHS; }
431 const SymExpr *getRHS() const { return RHS; }
433 void dumpToStream(raw_ostream &os) const override;
435 unsigned computeComplexity() const override {
437 Complexity = RHS->computeComplexity() + LHS->computeComplexity();
441 static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
442 BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) {
443 ID.AddInteger((unsigned) SymSymExprKind);
450 void Profile(llvm::FoldingSetNodeID& ID) override {
451 Profile(ID, LHS, getOpcode(), RHS, getType());
454 // Implement isa<T> support.
455 static bool classof(const SymExpr *SE) {
456 return SE->getKind() == SymSymExprKind;
460 class SymbolManager {
461 using DataSetTy = llvm::FoldingSet<SymExpr>;
462 using SymbolDependTy = llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy *>;
466 /// Stores the extra dependencies between symbols: the data should be kept
467 /// alive as long as the key is live.
468 SymbolDependTy SymbolDependencies;
470 unsigned SymbolCounter = 0;
471 llvm::BumpPtrAllocator& BPAlloc;
472 BasicValueFactory &BV;
476 SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
477 llvm::BumpPtrAllocator& bpalloc)
478 : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
481 static bool canSymbolicate(QualType T);
483 /// Make a unique symbol for MemRegion R according to its kind.
484 const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R);
486 const SymbolConjured* conjureSymbol(const Stmt *E,
487 const LocationContext *LCtx,
490 const void *SymbolTag = nullptr);
492 const SymbolConjured* conjureSymbol(const Expr *E,
493 const LocationContext *LCtx,
495 const void *SymbolTag = nullptr) {
496 return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag);
499 const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
500 const TypedValueRegion *R);
502 const SymbolExtent *getExtentSymbol(const SubRegion *R);
504 /// Creates a metadata symbol associated with a specific region.
506 /// VisitCount can be used to differentiate regions corresponding to
507 /// different loop iterations, thus, making the symbol path-dependent.
508 const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S,
510 const LocationContext *LCtx,
512 const void *SymbolTag = nullptr);
514 const SymbolCast* getCastSymbol(const SymExpr *Operand,
515 QualType From, QualType To);
517 const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
518 const llvm::APSInt& rhs, QualType t);
520 const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op,
521 const llvm::APSInt& rhs, QualType t) {
522 return getSymIntExpr(&lhs, op, rhs, t);
525 const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs,
526 BinaryOperator::Opcode op,
527 const SymExpr *rhs, QualType t);
529 const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
530 const SymExpr *rhs, QualType t);
532 QualType getType(const SymExpr *SE) const {
533 return SE->getType();
536 /// Add artificial symbol dependency.
538 /// The dependent symbol should stay alive as long as the primary is alive.
539 void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent);
541 const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary);
543 ASTContext &getContext() { return Ctx; }
544 BasicValueFactory &getBasicVals() { return BV; }
547 /// A class responsible for cleaning up unused symbols.
554 using SymbolSetTy = llvm::DenseSet<SymbolRef>;
555 using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>;
556 using RegionSetTy = llvm::DenseSet<const MemRegion *>;
558 SymbolMapTy TheLiving;
559 SymbolSetTy MetadataInUse;
561 RegionSetTy RegionRoots;
563 const StackFrameContext *LCtx;
565 SymbolManager& SymMgr;
566 StoreRef reapedStore;
567 llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache;
570 /// Construct a reaper object, which removes everything which is not
571 /// live before we execute statement s in the given location context.
573 /// If the statement is NULL, everything is this and parent contexts is
575 /// If the stack frame context is NULL, everything on stack is considered
577 SymbolReaper(const StackFrameContext *Ctx, const Stmt *s,
578 SymbolManager &symmgr, StoreManager &storeMgr)
579 : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {}
581 const LocationContext *getLocationContext() const { return LCtx; }
583 bool isLive(SymbolRef sym);
584 bool isLiveRegion(const MemRegion *region);
585 bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const;
586 bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const;
588 /// Unconditionally marks a symbol as live.
590 /// This should never be
591 /// used by checkers, only by the state infrastructure such as the store and
592 /// environment. Checkers should instead use metadata symbols and markInUse.
593 void markLive(SymbolRef sym);
595 /// Marks a symbol as important to a checker.
597 /// For metadata symbols,
598 /// this will keep the symbol alive as long as its associated region is also
599 /// live. For other symbols, this has no effect; checkers are not permitted
600 /// to influence the life of other symbols. This should be used before any
601 /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback.
602 void markInUse(SymbolRef sym);
604 using region_iterator = RegionSetTy::const_iterator;
606 region_iterator region_begin() const { return RegionRoots.begin(); }
607 region_iterator region_end() const { return RegionRoots.end(); }
609 /// Returns whether or not a symbol has been confirmed dead.
611 /// This should only be called once all marking of dead symbols has completed.
612 /// (For checkers, this means only in the checkDeadSymbols callback.)
613 bool isDead(SymbolRef sym) {
617 void markLive(const MemRegion *region);
618 void markElementIndicesLive(const MemRegion *region);
620 /// Set to the value of the symbolic store after
621 /// StoreManager::removeDeadBindings has been called.
622 void setReapedStore(StoreRef st) { reapedStore = st; }
625 /// Mark the symbols dependent on the input symbol as live.
626 void markDependentsLive(SymbolRef sym);
629 class SymbolVisitor {
631 ~SymbolVisitor() = default;
634 SymbolVisitor() = default;
635 SymbolVisitor(const SymbolVisitor &) = default;
636 SymbolVisitor(SymbolVisitor &&) {}
638 /// A visitor method invoked by ProgramStateManager::scanReachableSymbols.
640 /// The method returns \c true if symbols should continue be scanned and \c
642 virtual bool VisitSymbol(SymbolRef sym) = 0;
643 virtual bool VisitMemRegion(const MemRegion *) { return true; }
650 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H