From: Anna Zaks Date: Thu, 9 Mar 2017 00:01:16 +0000 (+0000) Subject: [analyzer] Extend taint propagation and checking to support LazyCompoundVal X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=de95e0942e95c8cc8dac7725c3202cd95478c0aa;p=clang [analyzer] Extend taint propagation and checking to support LazyCompoundVal A patch by Vlad Tsyrklevich! Differential Revision: https://reviews.llvm.org/D28445 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@297326 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h b/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h index fa7d3f72ab..7fa7515bf2 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h @@ -59,6 +59,30 @@ public: /// \return The value bound to the location \c loc. virtual SVal getBinding(Store store, Loc loc, QualType T = QualType()) = 0; + /// Return the default value bound to a region in a given store. The default + /// binding is the value of sub-regions that were not initialized separately + /// from their base region. For example, if the structure is zero-initialized + /// upon construction, this method retrieves the concrete zero value, even if + /// some or all fields were later overwritten manually. Default binding may be + /// an unknown, undefined, concrete, or symbolic value. + /// \param[in] store The store in which to make the lookup. + /// \param[in] R The region to find the default binding for. + /// \return The default value bound to the region in the store, if a default + /// binding exists. + virtual Optional getDefaultBinding(Store store, const MemRegion *R) = 0; + + /// Return the default value bound to a LazyCompoundVal. The default binding + /// is used to represent the value of any fields or elements within the + /// structure represented by the LazyCompoundVal which were not initialized + /// explicitly separately from the whole structure. Default binding may be an + /// unknown, undefined, concrete, or symbolic value. + /// \param[in] lcv The lazy compound value. + /// \return The default value bound to the LazyCompoundVal \c lcv, if a + /// default binding exists. + Optional getDefaultBinding(nonloc::LazyCompoundVal lcv) { + return getDefaultBinding(lcv.getStore(), lcv.getRegion()); + } + /// Return a state with the specified value bound to the given location. /// \param[in] store The analysis state. /// \param[in] loc The symbolic memory location. diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index af334c4143..b1a54e7795 100644 --- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -65,6 +65,18 @@ private: /// and thus, is tainted. static bool isStdin(const Expr *E, CheckerContext &C); + /// This is called from getPointedToSymbol() to resolve symbol references for + /// the region underlying a LazyCompoundVal. This is the default binding + /// for the LCV, which could be a conjured symbol from a function call that + /// initialized the region. It only returns the conjured symbol if the LCV + /// covers the entire region, e.g. we avoid false positives by not returning + /// a default bindingc for an entire struct if the symbol for only a single + /// field or element within it is requested. + // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so + // that they are also appropriately tainted. + static SymbolRef getLCVSymbol(CheckerContext &C, + nonloc::LazyCompoundVal &LCV); + /// \brief Given a pointer argument, get the symbol of the value it contains /// (points to). static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); @@ -461,6 +473,27 @@ bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ return false; } +SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C, + nonloc::LazyCompoundVal &LCV) { + StoreManager &StoreMgr = C.getStoreManager(); + + // getLCVSymbol() is reached in a PostStmt so we can always expect a default + // binding to exist if one is present. + if (Optional binding = StoreMgr.getDefaultBinding(LCV)) { + SymbolRef Sym = binding->getAsSymbol(); + if (!Sym) + return nullptr; + + // If the LCV covers an entire base region return the default conjured symbol. + if (LCV.getRegion() == LCV.getRegion()->getBaseRegion()) + return Sym; + } + + // Otherwise, return a nullptr as there's not yet a functional way to taint + // sub-regions of LCVs. + return nullptr; +} + SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, const Expr* Arg) { ProgramStateRef State = C.getState(); @@ -476,6 +509,10 @@ SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, dyn_cast(Arg->getType().getCanonicalType().getTypePtr()); SVal Val = State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType()); + + if (auto LCV = Val.getAs()) + return getLCVSymbol(C, *LCV); + return Val.getAsSymbol(); } diff --git a/lib/StaticAnalyzer/Core/RegionStore.cpp b/lib/StaticAnalyzer/Core/RegionStore.cpp index 934cc5cd3a..f0c2df4627 100644 --- a/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -494,6 +494,11 @@ public: // Part of public interface to class. return getBinding(getRegionBindings(S), L, T); } + Optional getDefaultBinding(Store S, const MemRegion *R) override { + RegionBindingsRef B = getRegionBindings(S); + return B.getDefaultBinding(R); + } + SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType()); SVal getBindingForElement(RegionBindingsConstRef B, const ElementRegion *R); diff --git a/test/Analysis/taint-generic.c b/test/Analysis/taint-generic.c index 8e8226b0e8..8efed66dac 100644 --- a/test/Analysis/taint-generic.c +++ b/test/Analysis/taint-generic.c @@ -169,6 +169,43 @@ void testSocket() { sock = socket(AF_LOCAL, SOCK_STREAM, 0); read(sock, buffer, 100); execl(buffer, "filename", 0); // no-warning + + sock = socket(AF_INET, SOCK_STREAM, 0); + // References to both buffer and &buffer as an argument should taint the argument + read(sock, &buffer, 100); + execl(buffer, "filename", 0); // expected-warning {{Untrusted data is passed to a system call}} +} + +void testStruct() { + struct { + char buf[16]; + int length; + } tainted; + + char buffer[16]; + int sock; + + sock = socket(AF_INET, SOCK_STREAM, 0); + read(sock, &tainted, sizeof(tainted)); + __builtin_memcpy(buffer, tainted.buf, tainted.length); // expected-warning {{Untrusted data is used to specify the buffer size}} +} + +void testStructArray() { + struct { + char buf[16]; + struct { + int length; + } st[1]; + } tainted; + + char buffer[16]; + int sock; + + sock = socket(AF_INET, SOCK_STREAM, 0); + read(sock, &tainted.buf[0], sizeof(tainted.buf)); + read(sock, &tainted.st[0], sizeof(tainted.st)); + // FIXME: tainted.st[0].length should be marked tainted + __builtin_memcpy(buffer, tainted.buf, tainted.st[0].length); // no-warning } int testDivByZero() {