From: Anna Zaks Date: Wed, 18 Jan 2012 02:45:07 +0000 (+0000) Subject: [analyzer] Taint: add taint propagation rules for string and memory copy X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9b0c749a20d0f7d0e63441d76baa15def3f37fdb;p=clang [analyzer] Taint: add taint propagation rules for string and memory copy functions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@148370 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h index b86f4e119d..0c51ac538f 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h @@ -149,7 +149,17 @@ public: const FunctionDecl *getCalleeDecl(const CallExpr *CE) const; /// \brief Get the name of the called function (path-sensitive). - StringRef getCalleeName(const CallExpr *CE) const; + StringRef getCalleeName(const FunctionDecl *FunDecl) const; + + /// \brief Get the name of the called function (path-sensitive). + StringRef getCalleeName(const CallExpr *CE) const { + const FunctionDecl *FunDecl = getCalleeDecl(CE); + return getCalleeName(FunDecl); + } + + /// Given a function declaration and a name checks if this is a C lib + /// function with the given name. + bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name); private: ExplodedNode *addTransitionImpl(const ProgramState *State, diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index 37d80ade48..52e01b619c 100644 --- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -20,6 +20,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/Basic/Builtins.h" #include using namespace clang; @@ -41,7 +42,10 @@ private: static const unsigned InvalidArgIndex = UINT_MAX - 1; mutable llvm::OwningPtr BT; - void initBugType() const; + inline void initBugType() const { + if (!BT) + BT.reset(new BugType("Taint Analysis", "General")); + } /// \brief Catch taint related bugs. Check if tainted data is passed to a /// system call etc. @@ -78,9 +82,6 @@ private: /// Taint the scanned input if the file is tainted. const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const; - /// Taint if any of the arguments are tainted. - const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const; - const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const; /// Check if the region the expression evaluates to is the standard input, /// and thus, is tainted. @@ -119,18 +120,42 @@ private: ArgVector SrcArgs; /// List of arguments which should be tainted on function return. ArgVector DstArgs; + // TODO: Check if using other data structures would be more optimal. TaintPropagationRule() {} - TaintPropagationRule(unsigned SArg, unsigned DArg) { + TaintPropagationRule(unsigned SArg, + unsigned DArg, bool TaintRet = false) { SrcArgs.push_back(SArg); DstArgs.push_back(DArg); + if (TaintRet) + DstArgs.push_back(ReturnValueIndex); + } + + TaintPropagationRule(unsigned SArg1, unsigned SArg2, + unsigned DArg, bool TaintRet = false) { + SrcArgs.push_back(SArg1); + SrcArgs.push_back(SArg2); + DstArgs.push_back(DArg); + if (TaintRet) + DstArgs.push_back(ReturnValueIndex); } + /// Get the propagation rule for a given function. + static TaintPropagationRule + getTaintPropagationRule(const FunctionDecl *FDecl, + StringRef Name, + CheckerContext &C); + inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } inline void addDstArg(unsigned A) { DstArgs.push_back(A); } - inline bool isNull() { return SrcArgs.empty(); } + inline bool isNull() const { return SrcArgs.empty(); } + + inline bool isDestinationArgument(unsigned ArgNum) const { + return (std::find(DstArgs.begin(), + DstArgs.end(), ArgNum) != DstArgs.end()); + } }; /// \brief Pre-process a function which propagates taint according to the @@ -141,14 +166,16 @@ private: }; -// TODO: We probably could use TableGen here. + +const unsigned GenericTaintChecker::ReturnValueIndex; +const unsigned GenericTaintChecker::InvalidArgIndex; + const char GenericTaintChecker::MsgUncontrolledFormatString[] = "Tainted format string (CWE-134: Uncontrolled Format String)"; const char GenericTaintChecker::MsgSanitizeSystemArgs[] = "Tainted data passed to a system call " "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; - } /// A set which is used to pass information from call pre-visit instruction @@ -163,9 +190,67 @@ template<> struct ProgramStateTrait }; }} -inline void GenericTaintChecker::initBugType() const { - if (!BT) - BT.reset(new BugType("Taint Analysis", "General")); +GenericTaintChecker::TaintPropagationRule +GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( + const FunctionDecl *FDecl, + StringRef Name, + CheckerContext &C) { + // Check for exact name match for functions without builtin substitutes. + TaintPropagationRule Rule = llvm::StringSwitch(Name) + .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) + .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) + .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) + .Default(TaintPropagationRule()); + + if (!Rule.isNull()) + return Rule; + + // Check if it's one of the memory setting/copying functions. + // This check is specialized but faster then calling isCLibraryFunction. + unsigned BId = 0; + if ( (BId = FDecl->getMemoryFunctionKind()) ) + switch(BId) { + case Builtin::BImemcpy: + case Builtin::BImemmove: + case Builtin::BIstrncpy: + case Builtin::BIstrncat: + return TaintPropagationRule(1, 2, 0, true); + break; + case Builtin::BIstrlcpy: + case Builtin::BIstrlcat: + return TaintPropagationRule(1, 2, 0, false); + break; + case Builtin::BIstrndup: + return TaintPropagationRule(0, 1, ReturnValueIndex); + break; + + default: + break; + }; + + // Process all other functions which could be defined as builtins. + if (Rule.isNull()) { + if (C.isCLibraryFunction(FDecl, "snprintf") || + C.isCLibraryFunction(FDecl, "sprintf")) + return TaintPropagationRule(InvalidArgIndex, 0, true); + else if (C.isCLibraryFunction(FDecl, "strcpy") || + C.isCLibraryFunction(FDecl, "stpcpy") || + C.isCLibraryFunction(FDecl, "strcat")) + return TaintPropagationRule(1, 0, true); + else if (C.isCLibraryFunction(FDecl, "bcopy")) + return TaintPropagationRule(0, 2, 1, false); + else if (C.isCLibraryFunction(FDecl, "strdup") || + C.isCLibraryFunction(FDecl, "strdupa")) + return TaintPropagationRule(0, ReturnValueIndex); + else if (C.isCLibraryFunction(FDecl, "strndupa")) + return TaintPropagationRule(0, 1, ReturnValueIndex); + } + + // Skipping the following functions, since they might be used for cleansing + // or smart memory copy: + // - memccpy - copying untill hitting a special character. + + return TaintPropagationRule(); } void GenericTaintChecker::checkPreStmt(const CallExpr *CE, @@ -187,41 +272,34 @@ void GenericTaintChecker::checkPostStmt(const CallExpr *CE, void GenericTaintChecker::addSourcesPre(const CallExpr *CE, CheckerContext &C) const { - // Set the evaluation function by switching on the callee name. - StringRef Name = C.getCalleeName(CE); + const ProgramState *State = 0; + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + StringRef Name = C.getCalleeName(FDecl); if (Name.empty()) return; - const ProgramState *State = 0; - - TaintPropagationRule Rule = llvm::StringSwitch(Name) - .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) - .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) - .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) - .Default(TaintPropagationRule()); - + // First, try generating a propagation rule for this function. + TaintPropagationRule Rule = + TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); if (!Rule.isNull()) { State = prePropagateTaint(CE, C, Rule); if (!State) return; C.addTransition(State); + return; } + // Otherwise, check if we have custom pre-processing implemented. FnCheck evalFunction = llvm::StringSwitch(Name) .Case("fscanf", &GenericTaintChecker::preFscanf) - .Cases("strcpy", "__builtin___strcpy_chk", - "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy) - .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy) - .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy) .Default(0); - // Check and evaluate the call. if (evalFunction) State = (this->*evalFunction)(CE, C); if (!State) return; - C.addTransition(State); + } bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, @@ -348,10 +426,14 @@ GenericTaintChecker::prePropagateTaint(const CallExpr *CE, unsigned ArgNum = *I; if (ArgNum == InvalidArgIndex) { - // Check if any of the arguments is tainted. - for (unsigned int i = 0; i < CE->getNumArgs(); ++i) + // Check if any of the arguments is tainted, but skip the + // destination arguments. + for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { + if (PR.isDestinationArgument(i)) + continue; if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) break; + } break; } @@ -419,30 +501,6 @@ const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE, return 0; } -// If any arguments are tainted, mark the return value as tainted on post-visit. -const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE, - CheckerContext &C) const { - for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { - const ProgramState *State = C.getState(); - const Expr *Arg = CE->getArg(i); - if (State->isTainted(Arg, C.getLocationContext()) || - State->isTainted(getPointedToSymbol(C, Arg))) - return State = State->add(ReturnValueIndex); - } - return 0; -} - -const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE, - CheckerContext &C) const { - assert(CE->getNumArgs() >= 2); - const Expr *FromArg = CE->getArg(1); - const ProgramState *State = C.getState(); - if (State->isTainted(FromArg, C.getLocationContext()) || - State->isTainted(getPointedToSymbol(C, FromArg))) - return State = State->add(0); - return 0; -} - const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE, CheckerContext &C) const { const ProgramState *State = C.getState(); diff --git a/lib/StaticAnalyzer/Core/CheckerContext.cpp b/lib/StaticAnalyzer/Core/CheckerContext.cpp index 3737ca5467..cb272fb1c3 100644 --- a/lib/StaticAnalyzer/Core/CheckerContext.cpp +++ b/lib/StaticAnalyzer/Core/CheckerContext.cpp @@ -13,6 +13,8 @@ //===----------------------------------------------------------------------===// #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/Basic/Builtins.h" + using namespace clang; using namespace ento; @@ -23,12 +25,31 @@ const FunctionDecl *CheckerContext::getCalleeDecl(const CallExpr *CE) const { return L.getAsFunctionDecl(); } -StringRef CheckerContext::getCalleeName(const CallExpr *CE) const { - const FunctionDecl *funDecl = getCalleeDecl(CE); - if (!funDecl) +StringRef CheckerContext::getCalleeName(const FunctionDecl *FunDecl) const { + if (!FunDecl) return StringRef(); - IdentifierInfo *funI = funDecl->getIdentifier(); + IdentifierInfo *funI = FunDecl->getIdentifier(); if (!funI) return StringRef(); return funI->getName(); } + + +bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD, + StringRef Name){ + // To avoid false positives (Ex: finding user defined functions with + // similar names), only perform fuzzy name matching when it's a builtin. + // Using a string compare is slow, we might want to switch on BuiltinID here. + unsigned BId = FD->getBuiltinID(); + if (BId != 0) { + ASTContext &Context = getASTContext(); + StringRef BName = Context.BuiltinInfo.GetName(BId); + if (StringRef(BName).find(Name) != StringRef::npos) + return true; + } + + if (FD->isExternC() && FD->getIdentifier()->getName().equals(Name)) + return true; + + return false; +} diff --git a/test/Analysis/taint-generic.c b/test/Analysis/taint-generic.c index 0f8996674c..2d00f4daa9 100644 --- a/test/Analysis/taint-generic.c +++ b/test/Analysis/taint-generic.c @@ -22,6 +22,7 @@ static char *__inline_strcpy_chk (char *dest, const char *src) { } char *stpcpy(char *restrict s1, const char *restrict s2); char *strncpy( char * destination, const char * source, size_t num ); +char *strndup(const char *s, size_t n); #define BUFSIZE 10 @@ -86,9 +87,18 @@ void testUncontrolledFormatString(char **p) { stpcpy(spcpy, s); setproctitle(spcpy, 3); // expected-warning {{Uncontrolled Format String}} + char *spcpyret; + spcpyret = stpcpy(spcpy, s); + setproctitle(spcpyret, 3); // expected-warning {{Uncontrolled Format String}} + char sncpy[80]; strncpy(sncpy, s, 20); setproctitle(sncpy, 3); // expected-warning {{Uncontrolled Format String}} + + char *dup; + dup = strndup(s, 20); + setproctitle(dup, 3); // expected-warning {{Uncontrolled Format String}} + } int system(const char *command); @@ -97,4 +107,24 @@ void testTaintSystemCall() { char addr[128]; scanf("%s", addr); system(addr); // expected-warning {{Tainted data passed to a system call}} + + // Test that spintf transfers taint. + sprintf(buffer, "/bin/mail %s < /tmp/email", addr); + system(buffer); // expected-warning {{Tainted data passed to a system call}} +} +void testTaintSystemCall2() { + // Test that snpintf transfers taint. + char buffern[156]; + char addr[128]; + scanf("%s", addr); + __builtin_snprintf(buffern, 10, "/bin/mail %s < /tmp/email", addr); + system(buffern); // expected-warning {{Tainted data passed to a system call}} +} +void testTaintSystemCall3() { + char buffern2[156]; + int numt; + char addr[128]; + scanf("%s %d", addr, &numt); + __builtin_snprintf(buffern2, numt, "/bin/mail %s < /tmp/email", "abcd"); + system(buffern2); // expected-warning {{Tainted data passed to a system call}} }