From 8ac513b1f101fa3b1332036b4c9131dc3b88c49a Mon Sep 17 00:00:00 2001 From: Artem Dergachev Date: Fri, 29 Mar 2019 22:49:30 +0000 Subject: [PATCH] [analyzer] Move taint API from ProgramState to a separate header. NFC. It is now an inter-checker communication API, similar to the one that connects MallocChecker/CStringChecker/InnerPointerChecker: simply a set of setters and getters for a state trait. Differential Revision: https://reviews.llvm.org/D59861 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@357326 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Core/BugReporter/BugReporterVisitors.h | 14 -- .../Core/PathSensitive/ProgramState.h | 37 --- .../Core/PathSensitive/TaintManager.h | 58 ----- .../Core/PathSensitive/TaintTag.h | 29 --- .../Checkers/ArrayBoundCheckerV2.cpp | 4 +- lib/StaticAnalyzer/Checkers/CMakeLists.txt | 1 + .../Checkers/DivZeroChecker.cpp | 6 +- .../Checkers/GenericTaintChecker.cpp | 23 +- lib/StaticAnalyzer/Checkers/Taint.cpp | 227 ++++++++++++++++++ lib/StaticAnalyzer/Checkers/Taint.h | 102 ++++++++ .../Checkers/TaintTesterChecker.cpp | 5 +- .../Checkers/VLASizeChecker.cpp | 4 +- .../Core/BugReporterVisitors.cpp | 20 -- lib/StaticAnalyzer/Core/CMakeLists.txt | 1 - lib/StaticAnalyzer/Core/ProgramState.cpp | 183 -------------- lib/StaticAnalyzer/Core/TaintManager.cpp | 22 -- test/Analysis/taint-dumps.c | 14 ++ 17 files changed, 375 insertions(+), 375 deletions(-) delete mode 100644 include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h delete mode 100644 include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h create mode 100644 lib/StaticAnalyzer/Checkers/Taint.cpp create mode 100644 lib/StaticAnalyzer/Checkers/Taint.h delete mode 100644 lib/StaticAnalyzer/Core/TaintManager.cpp create mode 100644 test/Analysis/taint-dumps.c diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h index 8d6820fb8a..e624d0fc02 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h @@ -307,20 +307,6 @@ public: BugReport &BR) override; }; -/// The bug visitor prints a diagnostic message at the location where a given -/// variable was tainted. -class TaintBugVisitor final : public BugReporterVisitor { -private: - const SVal V; - -public: - TaintBugVisitor(const SVal V) : V(V) {} - void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } - - std::shared_ptr VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR) override; -}; /// The bug visitor will walk all the nodes in a path and collect all the /// constraints. When it reaches the root node, will create a refutation diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h index e5ff83eaea..2c6465d5fb 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -20,7 +20,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableMap.h" #include "llvm/Support/Allocator.h" @@ -43,7 +42,6 @@ typedef std::unique_ptr(*ConstraintManagerCreator)( ProgramStateManager &, SubEngine *); typedef std::unique_ptr(*StoreManagerCreator)( ProgramStateManager &); -typedef llvm::ImmutableMap TaintedSubRegions; //===----------------------------------------------------------------------===// // ProgramStateTrait - Traits used by the Generic Data Map of a ProgramState. @@ -367,38 +365,6 @@ public: template CB scanReachableSymbols(llvm::iterator_range Reachable) const; - /// Create a new state in which the statement is marked as tainted. - LLVM_NODISCARD ProgramStateRef - addTaint(const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind = TaintTagGeneric) const; - - /// Create a new state in which the value is marked as tainted. - LLVM_NODISCARD ProgramStateRef - addTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const; - - /// Create a new state in which the symbol is marked as tainted. - LLVM_NODISCARD ProgramStateRef addTaint(SymbolRef S, - TaintTagType Kind = TaintTagGeneric) const; - - /// Create a new state in which the region symbol is marked as tainted. - LLVM_NODISCARD ProgramStateRef - addTaint(const MemRegion *R, TaintTagType Kind = TaintTagGeneric) const; - - /// Create a new state in a which a sub-region of a given symbol is tainted. - /// This might be necessary when referring to regions that can not have an - /// individual symbol, e.g. if they are represented by the default binding of - /// a LazyCompoundVal. - LLVM_NODISCARD ProgramStateRef - addPartialTaint(SymbolRef ParentSym, const SubRegion *SubRegion, - TaintTagType Kind = TaintTagGeneric) const; - - /// Check if the statement is tainted in the current state. - bool isTainted(const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind = TaintTagGeneric) const; - bool isTainted(SVal V, TaintTagType Kind = TaintTagGeneric) const; - bool isTainted(SymbolRef Sym, TaintTagType Kind = TaintTagGeneric) const; - bool isTainted(const MemRegion *Reg, TaintTagType Kind=TaintTagGeneric) const; - //==---------------------------------------------------------------------==// // Accessing the Generic Data Map (GDM). //==---------------------------------------------------------------------==// @@ -462,10 +428,8 @@ public: const LocationContext *CurrentLC = nullptr) const; void printDOT(raw_ostream &Out, const LocationContext *CurrentLC = nullptr) const; - void printTaint(raw_ostream &Out, const char *nl = "\n") const; void dump() const; - void dumpTaint() const; private: friend void ProgramStateRetain(const ProgramState *state); @@ -499,7 +463,6 @@ private: std::unique_ptr ConstraintMgr; ProgramState::GenericDataMap::Factory GDMFactory; - TaintedSubRegions::Factory TSRFactory; typedef llvm::DenseMap > GDMContextsTy; GDMContextsTy GDMContexts; diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h b/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h deleted file mode 100644 index 9424392ce7..0000000000 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h +++ /dev/null @@ -1,58 +0,0 @@ -//===- TaintManager.h - Managing taint --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides APIs for adding, removing, querying symbol taint. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H -#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H - -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h" -#include "llvm/ADT/ImmutableMap.h" - -namespace clang { -namespace ento { - -/// The GDM component containing the tainted root symbols. We lazily infer the -/// taint of the dependent symbols. Currently, this is a map from a symbol to -/// tag kind. TODO: Should support multiple tag kinds. -// FIXME: This does not use the nice trait macros because it must be accessible -// from multiple translation units. -struct TaintMap {}; - -using TaintMapImpl = llvm::ImmutableMap; - -template<> struct ProgramStateTrait - : public ProgramStatePartialTrait { - static void *GDMIndex(); -}; - -/// The GDM component mapping derived symbols' parent symbols to their -/// underlying regions. This is used to efficiently check whether a symbol is -/// tainted when it represents a sub-region of a tainted symbol. -struct DerivedSymTaint {}; - -using DerivedSymTaintImpl = llvm::ImmutableMap; - -template<> struct ProgramStateTrait - : public ProgramStatePartialTrait { - static void *GDMIndex(); -}; - -class TaintManager { - TaintManager() = default; -}; - -} // namespace ento -} // namespace clang - -#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h b/include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h deleted file mode 100644 index d1018f9d1c..0000000000 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h +++ /dev/null @@ -1,29 +0,0 @@ -//===- TaintTag.h - Path-sensitive "State" for tracking values --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines a set of taint tags. Several tags are used to differentiate kinds -// of taint. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H -#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H - -namespace clang { -namespace ento { - -/// The type of taint, which helps to differentiate between different types of -/// taint. -using TaintTagType = unsigned; - -static const TaintTagType TaintTagGeneric = 0; - -} // namespace ento -} // namespace clang - -#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H diff --git a/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 8c0b8a1916..3bf8a1836b 100644 --- a/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/CharUnits.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" @@ -24,6 +25,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class ArrayBoundCheckerV2 : @@ -204,7 +206,7 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, // If we are under constrained and the index variables are tainted, report. if (state_exceedsUpperBound && state_withinUpperBound) { SVal ByteOffset = rawOffset.getByteOffset(); - if (state->isTainted(ByteOffset)) { + if (isTainted(state, ByteOffset)) { reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, llvm::make_unique(ByteOffset)); return; diff --git a/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/lib/StaticAnalyzer/Checkers/CMakeLists.txt index 0a1b7745bf..6c11246078 100644 --- a/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ b/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -87,6 +87,7 @@ add_clang_library(clangStaticAnalyzerCheckers StackAddrEscapeChecker.cpp StdLibraryFunctionsChecker.cpp StreamChecker.cpp + Taint.cpp TaintTesterChecker.cpp TestAfterDivZeroChecker.cpp TraversalChecker.cpp diff --git a/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp b/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp index 4830574ca8..33e8fcd8af 100644 --- a/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" @@ -19,6 +20,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class DivZeroChecker : public Checker< check::PreStmt > { @@ -83,10 +85,10 @@ void DivZeroChecker::checkPreStmt(const BinaryOperator *B, return; } - bool TaintedD = C.getState()->isTainted(*DV); + bool TaintedD = isTainted(C.getState(), *DV); if ((stateNotZero && stateZero && TaintedD)) { reportBug("Division by a tainted value, possibly zero", stateZero, C, - llvm::make_unique(*DV)); + llvm::make_unique(*DV)); return; } diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index b195842f13..d3ab980332 100644 --- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -13,6 +13,8 @@ // aggressively, even if the involved symbols are under constrained. // //===----------------------------------------------------------------------===// + +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/Attr.h" #include "clang/Basic/Builtins.h" @@ -27,6 +29,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class GenericTaintChecker @@ -41,6 +44,9 @@ public: void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + void printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const override; + private: static const unsigned InvalidArgIndex = UINT_MAX; /// Denotes the return vale. @@ -152,14 +158,14 @@ private: static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, CheckerContext &C) { - if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) + if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) return true; if (!E->getType().getTypePtr()->isPointerType()) return false; Optional V = getPointedToSVal(C, E); - return (V && State->isTainted(*V)); + return (V && isTainted(State, *V)); } /// Pre-process a function which propagates taint according to the @@ -313,6 +319,11 @@ void GenericTaintChecker::checkPostStmt(const CallExpr *CE, propagateFromPre(CE, C); } +void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + printTaint(State, Out, NL, Sep); +} + void GenericTaintChecker::addSourcesPre(const CallExpr *CE, CheckerContext &C) const { ProgramStateRef State = nullptr; @@ -354,7 +365,7 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, for (unsigned ArgNum : TaintArgs) { // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { - State = State->addTaint(CE, C.getLocationContext()); + State = addTaint(State, CE, C.getLocationContext()); continue; } @@ -365,7 +376,7 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, const Expr *Arg = CE->getArg(ArgNum); Optional V = getPointedToSVal(C, Arg); if (V) - State = State->addTaint(*V); + State = addTaint(State, *V); } // Clear up the taint info from the state. @@ -570,9 +581,9 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E, ProgramStateRef State = C.getState(); Optional PointedToSVal = getPointedToSVal(C, E); SVal TaintedSVal; - if (PointedToSVal && State->isTainted(*PointedToSVal)) + if (PointedToSVal && isTainted(State, *PointedToSVal)) TaintedSVal = *PointedToSVal; - else if (State->isTainted(E, C.getLocationContext())) + else if (isTainted(State, E, C.getLocationContext())) TaintedSVal = C.getSVal(E); else return false; diff --git a/lib/StaticAnalyzer/Checkers/Taint.cpp b/lib/StaticAnalyzer/Checkers/Taint.cpp new file mode 100644 index 0000000000..80235953c5 --- /dev/null +++ b/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -0,0 +1,227 @@ +//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines basic, non-domain-specific mechanisms for tracking tainted values. +// +//===----------------------------------------------------------------------===// + +#include "Taint.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +// Fully tainted symbols. +REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) + +// Partially tainted symbols. +REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, + TaintTagType); +REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) + +void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, + const char *Sep) { + TaintMapTy TM = State->get(); + + if (!TM.isEmpty()) + Out << "Tainted symbols:" << NL; + + for (const auto &I : TM) + Out << I.first << " : " << I.second << NL; +} + +void dumpTaint(ProgramStateRef State) { + printTaint(State, llvm::errs()); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind) { + return addTaint(State, State->getSVal(S, LCtx), Kind); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, + TaintTagType Kind) { + SymbolRef Sym = V.getAsSymbol(); + if (Sym) + return addTaint(State, Sym, Kind); + + // If the SVal represents a structure, try to mass-taint all values within the + // structure. For now it only works efficiently on lazy compound values that + // were conjured during a conservative evaluation of a function - either as + // return values of functions that return structures or arrays by value, or as + // values of structures or arrays passed into the function by reference, + // directly or through pointer aliasing. Such lazy compound values are + // characterized by having exactly one binding in their captured store within + // their parent region, which is a conjured symbol default-bound to the base + // region of the parent region. + if (auto LCV = V.getAs()) { + if (Optional binding = + State->getStateManager().getStoreManager() + .getDefaultBinding(*LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return addPartialTaint(State, Sym, LCV->getRegion(), Kind); + } + } + + const MemRegion *R = V.getAsRegion(); + return addTaint(State, R, Kind); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, + TaintTagType Kind) { + if (const SymbolicRegion *SR = dyn_cast_or_null(R)) + return addTaint(State, SR->getSymbol(), Kind); + return State; +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind) { + // If this is a symbol cast, remove the cast before adding the taint. Taint + // is cast agnostic. + while (const SymbolCast *SC = dyn_cast(Sym)) + Sym = SC->getOperand(); + + ProgramStateRef NewState = State->set(Sym, Kind); + assert(NewState); + return NewState; +} + +ProgramStateRef taint::addPartialTaint(ProgramStateRef State, + SymbolRef ParentSym, + const SubRegion *SubRegion, + TaintTagType Kind) { + // Ignore partial taint if the entire parent symbol is already tainted. + if (const TaintTagType *T = State->get(ParentSym)) + if (*T == Kind) + return State; + + // Partial taint applies if only a portion of the symbol is tainted. + if (SubRegion == SubRegion->getBaseRegion()) + return addTaint(State, ParentSym, Kind); + + const TaintedSubRegions *SavedRegs = State->get(ParentSym); + TaintedSubRegions::Factory &F = State->get_context(); + TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); + + Regs = F.add(Regs, SubRegion, Kind); + ProgramStateRef NewState = State->set(ParentSym, Regs); + assert(NewState); + return NewState; +} + +bool taint::isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, TaintTagType Kind) { + SVal val = State->getSVal(S, LCtx); + return isTainted(State, val, Kind); +} + +bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { + if (const SymExpr *Sym = V.getAsSymExpr()) + return isTainted(State, Sym, Kind); + if (const MemRegion *Reg = V.getAsRegion()) + return isTainted(State, Reg, Kind); + return false; +} + +bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType K) { + if (!Reg) + return false; + + // Element region (array element) is tainted if either the base or the offset + // are tainted. + if (const ElementRegion *ER = dyn_cast(Reg)) + return isTainted(State, ER->getSuperRegion(), K) || + isTainted(State, ER->getIndex(), K); + + if (const SymbolicRegion *SR = dyn_cast(Reg)) + return isTainted(State, SR->getSymbol(), K); + + if (const SubRegion *ER = dyn_cast(Reg)) + return isTainted(State, ER->getSuperRegion(), K); + + return false; +} + +bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { + if (!Sym) + return false; + + // Traverse all the symbols this symbol depends on to see if any are tainted. + for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), + SE = Sym->symbol_end(); SI != SE; ++SI) { + if (!isa(*SI)) + continue; + + if (const TaintTagType *Tag = State->get(*SI)) { + if (*Tag == Kind) + return true; + } + + if (const auto *SD = dyn_cast(*SI)) { + // If this is a SymbolDerived with a tainted parent, it's also tainted. + if (isTainted(State, SD->getParentSymbol(), Kind)) + return true; + + // If this is a SymbolDerived with the same parent symbol as another + // tainted SymbolDerived and a region that's a sub-region of that tainted + // symbol, it's also tainted. + if (const TaintedSubRegions *Regs = + State->get(SD->getParentSymbol())) { + const TypedValueRegion *R = SD->getRegion(); + for (auto I : *Regs) { + // FIXME: The logic to identify tainted regions could be more + // complete. For example, this would not currently identify + // overlapping fields in a union as tainted. To identify this we can + // check for overlapping/nested byte offsets. + if (Kind == I.second && R->isSubRegionOf(I.first)) + return true; + } + } + } + + // If memory region is tainted, data is also tainted. + if (const auto *SRV = dyn_cast(*SI)) { + if (isTainted(State, SRV->getRegion(), Kind)) + return true; + } + + // If this is a SymbolCast from a tainted value, it's also tainted. + if (const auto *SC = dyn_cast(*SI)) { + if (isTainted(State, SC->getOperand(), Kind)) + return true; + } + } + + return false; +} + +std::shared_ptr +TaintBugVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, + BugReport &BR) { + + // Find the ExplodedNode where the taint was first introduced + if (!isTainted(N->getState(), V) || + isTainted(N->getFirstPred()->getState(), V)) + return nullptr; + + const Stmt *S = PathDiagnosticLocation::getStmt(N); + if (!S) + return nullptr; + + const LocationContext *NCtx = N->getLocationContext(); + PathDiagnosticLocation L = + PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); + if (!L.isValid() || !L.asLocation().isValid()) + return nullptr; + + return std::make_shared(L, "Taint originated here"); +} diff --git a/lib/StaticAnalyzer/Checkers/Taint.h b/lib/StaticAnalyzer/Checkers/Taint.h new file mode 100644 index 0000000000..72cf6a79d5 --- /dev/null +++ b/lib/StaticAnalyzer/Checkers/Taint.h @@ -0,0 +1,102 @@ +//=== Taint.h - Taint tracking and basic propagation rules. --------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines basic, non-domain-specific mechanisms for tracking tainted values. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H + +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +namespace clang { +namespace ento { +namespace taint { + +/// The type of taint, which helps to differentiate between different types of +/// taint. +using TaintTagType = unsigned; + +static constexpr TaintTagType TaintTagGeneric = 0; + +/// Create a new state in which the value of the statement is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the value is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the symbol is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the pointer represented by the region +/// is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, const MemRegion *R, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in a which a sub-region of a given symbol is tainted. +/// This might be necessary when referring to regions that can not have an +/// individual symbol, e.g. if they are represented by the default binding of +/// a LazyCompoundVal. +LLVM_NODISCARD ProgramStateRef +addPartialTaint(ProgramStateRef State, + SymbolRef ParentSym, const SubRegion *SubRegion, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the statement has a tainted value in the given state. +bool isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the value is tainted in the given state. +bool isTainted(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the symbol is tainted in the given state. +bool isTainted(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the pointer represented by the region is tainted in the given +/// state. +bool isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType Kind = TaintTagGeneric); + +void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl = "\n", + const char *sep = ""); + +LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State); + +/// The bug visitor prints a diagnostic message at the location where a given +/// variable was tainted. +class TaintBugVisitor final : public BugReporterVisitor { +private: + const SVal V; + +public: + TaintBugVisitor(const SVal V) : V(V) {} + void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } + + std::shared_ptr VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + BugReport &BR) override; +}; + +} // namespace taint +} // namespace ento +} // namespace clang + +#endif + diff --git a/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp b/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp index ba931f1688..094762e2fa 100644 --- a/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp @@ -9,6 +9,8 @@ // This checker can be used for testing how taint data is propagated. // //===----------------------------------------------------------------------===// + +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" @@ -17,6 +19,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class TaintTesterChecker : public Checker< check::PostStmt > { @@ -46,7 +49,7 @@ void TaintTesterChecker::checkPostStmt(const Expr *E, if (!State) return; - if (State->isTainted(E, C.getLocationContext())) { + if (isTainted(State, E, C.getLocationContext())) { if (ExplodedNode *N = C.generateNonFatalErrorNode()) { initBugType(); auto report = llvm::make_unique(*BT, "tainted",N); diff --git a/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp index 04d77bc31e..1630896c3b 100644 --- a/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/CharUnits.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" @@ -25,6 +26,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class VLASizeChecker : public Checker< check::PreStmt > { @@ -106,7 +108,7 @@ void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { return; // Check if the size is tainted. - if (state->isTainted(sizeV)) { + if (isTainted(state, sizeV)) { reportBug(VLA_Tainted, SE, nullptr, C, llvm::make_unique(sizeV)); return; diff --git a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index 2aa5e26413..1bbc0dbc02 100644 --- a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -2412,26 +2412,6 @@ CXXSelfAssignmentBRVisitor::VisitNode(const ExplodedNode *Succ, return std::move(Piece); } -std::shared_ptr -TaintBugVisitor::VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, BugReport &) { - - // Find the ExplodedNode where the taint was first introduced - if (!N->getState()->isTainted(V) || N->getFirstPred()->getState()->isTainted(V)) - return nullptr; - - const Stmt *S = PathDiagnosticLocation::getStmt(N); - if (!S) - return nullptr; - - const LocationContext *NCtx = N->getLocationContext(); - PathDiagnosticLocation L = - PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); - if (!L.isValid() || !L.asLocation().isValid()) - return nullptr; - - return std::make_shared(L, "Taint originated here"); -} FalsePositiveRefutationBRVisitor::FalsePositiveRefutationBRVisitor() : Constraints(ConstraintRangeTy::Factory().getEmptyMap()) {} diff --git a/lib/StaticAnalyzer/Core/CMakeLists.txt b/lib/StaticAnalyzer/Core/CMakeLists.txt index 977e947933..942aedd388 100644 --- a/lib/StaticAnalyzer/Core/CMakeLists.txt +++ b/lib/StaticAnalyzer/Core/CMakeLists.txt @@ -45,7 +45,6 @@ add_clang_library(clangStaticAnalyzerCore SValBuilder.cpp SVals.cpp SymbolManager.cpp - TaintManager.cpp WorkList.cpp LINK_LIBS diff --git a/lib/StaticAnalyzer/Core/ProgramState.cpp b/lib/StaticAnalyzer/Core/ProgramState.cpp index b10a8b20a4..04ed507055 100644 --- a/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -16,7 +16,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h" #include "llvm/Support/raw_ostream.h" @@ -458,9 +457,6 @@ void ProgramState::print(raw_ostream &Out, // Print out the tracked dynamic types. printDynamicTypeInfo(this, Out, NL, Sep); - // Print out tainted symbols. - printTaint(Out, NL); - // Print checker-specific data. Mgr.getOwningEngine().printState(Out, this, NL, Sep, LC); } @@ -474,22 +470,6 @@ LLVM_DUMP_METHOD void ProgramState::dump() const { print(llvm::errs()); } -void ProgramState::printTaint(raw_ostream &Out, - const char *NL) const { - TaintMapImpl TM = get(); - - if (!TM.isEmpty()) - Out <<"Tainted symbols:" << NL; - - for (TaintMapImpl::iterator I = TM.begin(), E = TM.end(); I != E; ++I) { - Out << I->first << " : " << I->second << NL; - } -} - -void ProgramState::dumpTaint() const { - printTaint(llvm::errs()); -} - AnalysisManager& ProgramState::getAnalysisManager() const { return stateMgr->getOwningEngine().getAnalysisManager(); } @@ -657,166 +637,3 @@ bool ProgramState::scanReachableSymbols( } return true; } - -ProgramStateRef ProgramState::addTaint(const Stmt *S, - const LocationContext *LCtx, - TaintTagType Kind) const { - if (const Expr *E = dyn_cast_or_null(S)) - S = E->IgnoreParens(); - - return addTaint(getSVal(S, LCtx), Kind); -} - -ProgramStateRef ProgramState::addTaint(SVal V, - TaintTagType Kind) const { - SymbolRef Sym = V.getAsSymbol(); - if (Sym) - return addTaint(Sym, Kind); - - // If the SVal represents a structure, try to mass-taint all values within the - // structure. For now it only works efficiently on lazy compound values that - // were conjured during a conservative evaluation of a function - either as - // return values of functions that return structures or arrays by value, or as - // values of structures or arrays passed into the function by reference, - // directly or through pointer aliasing. Such lazy compound values are - // characterized by having exactly one binding in their captured store within - // their parent region, which is a conjured symbol default-bound to the base - // region of the parent region. - if (auto LCV = V.getAs()) { - if (Optional binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) { - if (SymbolRef Sym = binding->getAsSymbol()) - return addPartialTaint(Sym, LCV->getRegion(), Kind); - } - } - - const MemRegion *R = V.getAsRegion(); - return addTaint(R, Kind); -} - -ProgramStateRef ProgramState::addTaint(const MemRegion *R, - TaintTagType Kind) const { - if (const SymbolicRegion *SR = dyn_cast_or_null(R)) - return addTaint(SR->getSymbol(), Kind); - return this; -} - -ProgramStateRef ProgramState::addTaint(SymbolRef Sym, - TaintTagType Kind) const { - // If this is a symbol cast, remove the cast before adding the taint. Taint - // is cast agnostic. - while (const SymbolCast *SC = dyn_cast(Sym)) - Sym = SC->getOperand(); - - ProgramStateRef NewState = set(Sym, Kind); - assert(NewState); - return NewState; -} - -ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym, - const SubRegion *SubRegion, - TaintTagType Kind) const { - // Ignore partial taint if the entire parent symbol is already tainted. - if (contains(ParentSym) && *get(ParentSym) == Kind) - return this; - - // Partial taint applies if only a portion of the symbol is tainted. - if (SubRegion == SubRegion->getBaseRegion()) - return addTaint(ParentSym, Kind); - - const TaintedSubRegions *SavedRegs = get(ParentSym); - TaintedSubRegions Regs = - SavedRegs ? *SavedRegs : stateMgr->TSRFactory.getEmptyMap(); - - Regs = stateMgr->TSRFactory.add(Regs, SubRegion, Kind); - ProgramStateRef NewState = set(ParentSym, Regs); - assert(NewState); - return NewState; -} - -bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind) const { - if (const Expr *E = dyn_cast_or_null(S)) - S = E->IgnoreParens(); - - SVal val = getSVal(S, LCtx); - return isTainted(val, Kind); -} - -bool ProgramState::isTainted(SVal V, TaintTagType Kind) const { - if (const SymExpr *Sym = V.getAsSymExpr()) - return isTainted(Sym, Kind); - if (const MemRegion *Reg = V.getAsRegion()) - return isTainted(Reg, Kind); - return false; -} - -bool ProgramState::isTainted(const MemRegion *Reg, TaintTagType K) const { - if (!Reg) - return false; - - // Element region (array element) is tainted if either the base or the offset - // are tainted. - if (const ElementRegion *ER = dyn_cast(Reg)) - return isTainted(ER->getSuperRegion(), K) || isTainted(ER->getIndex(), K); - - if (const SymbolicRegion *SR = dyn_cast(Reg)) - return isTainted(SR->getSymbol(), K); - - if (const SubRegion *ER = dyn_cast(Reg)) - return isTainted(ER->getSuperRegion(), K); - - return false; -} - -bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const { - if (!Sym) - return false; - - // Traverse all the symbols this symbol depends on to see if any are tainted. - for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end(); - SI != SE; ++SI) { - if (!isa(*SI)) - continue; - - if (const TaintTagType *Tag = get(*SI)) { - if (*Tag == Kind) - return true; - } - - if (const SymbolDerived *SD = dyn_cast(*SI)) { - // If this is a SymbolDerived with a tainted parent, it's also tainted. - if (isTainted(SD->getParentSymbol(), Kind)) - return true; - - // If this is a SymbolDerived with the same parent symbol as another - // tainted SymbolDerived and a region that's a sub-region of that tainted - // symbol, it's also tainted. - if (const TaintedSubRegions *Regs = - get(SD->getParentSymbol())) { - const TypedValueRegion *R = SD->getRegion(); - for (auto I : *Regs) { - // FIXME: The logic to identify tainted regions could be more - // complete. For example, this would not currently identify - // overlapping fields in a union as tainted. To identify this we can - // check for overlapping/nested byte offsets. - if (Kind == I.second && R->isSubRegionOf(I.first)) - return true; - } - } - } - - // If memory region is tainted, data is also tainted. - if (const SymbolRegionValue *SRV = dyn_cast(*SI)) { - if (isTainted(SRV->getRegion(), Kind)) - return true; - } - - // If this is a SymbolCast from a tainted value, it's also tainted. - if (const SymbolCast *SC = dyn_cast(*SI)) { - if (isTainted(SC->getOperand(), Kind)) - return true; - } - } - - return false; -} diff --git a/lib/StaticAnalyzer/Core/TaintManager.cpp b/lib/StaticAnalyzer/Core/TaintManager.cpp deleted file mode 100644 index a37a3f78e0..0000000000 --- a/lib/StaticAnalyzer/Core/TaintManager.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//== TaintManager.cpp ------------------------------------------ -*- C++ -*--=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h" - -using namespace clang; -using namespace ento; - -void *ProgramStateTrait::GDMIndex() { - static int index = 0; - return &index; -} - -void *ProgramStateTrait::GDMIndex() { - static int index; - return &index; -} diff --git a/test/Analysis/taint-dumps.c b/test/Analysis/taint-dumps.c new file mode 100644 index 0000000000..305e4e9888 --- /dev/null +++ b/test/Analysis/taint-dumps.c @@ -0,0 +1,14 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint\ +// RUN: -analyzer-checker=debug.ExprInspection %s\ +// RUN: 2>&1 | FileCheck %s + +void clang_analyzer_printState(); +int getchar(); + +// CHECK: Tainted symbols: +// CHECK-NEXT: conj_$2{{.*}} : 0 +int test_taint_dumps() { + int x = getchar(); + clang_analyzer_printState(); + return x; +} -- 2.40.0