From e64f311c11a8751867c2538807054f4817c1f5cb Mon Sep 17 00:00:00 2001 From: Jordy Rose Date: Mon, 16 Aug 2010 07:51:42 +0000 Subject: [PATCH] Model the effects of strcpy() and stpcpy() in CStringChecker. Other changes: - Fix memcpy() and friends to actually invalidate the destination buffer. - Emit a different message for out-of-bounds buffer accesses if the buffer is being written to. - When conjuring symbols, let ValueManager figure out the type. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@111120 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Checker/CStringChecker.cpp | 217 ++++++++++++++++++++++++++++++--- test/Analysis/bstring.c | 27 ++-- test/Analysis/string.c | 101 +++++++++++++++ 3 files changed, 320 insertions(+), 25 deletions(-) diff --git a/lib/Checker/CStringChecker.cpp b/lib/Checker/CStringChecker.cpp index bf3316fbda..0c0ccd6877 100644 --- a/lib/Checker/CStringChecker.cpp +++ b/lib/Checker/CStringChecker.cpp @@ -22,10 +22,11 @@ using namespace clang; namespace { class CStringChecker : public CheckerVisitor { - BugType *BT_Null, *BT_Bounds, *BT_Overlap, *BT_NotCString; + BugType *BT_Null, *BT_Bounds, *BT_BoundsWrite, *BT_Overlap, *BT_NotCString; public: CStringChecker() - : BT_Null(0), BT_Bounds(0), BT_Overlap(0), BT_NotCString(0) {} + : BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0) + {} static void *getTag() { static int tag; return &tag; } bool EvalCallExpr(CheckerContext &C, const CallExpr *CE); @@ -52,15 +53,24 @@ public: void EvalStrlen(CheckerContext &C, const CallExpr *CE); + void EvalStrcpy(CheckerContext &C, const CallExpr *CE); + void EvalStpcpy(CheckerContext &C, const CallExpr *CE); + void EvalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd); + // Utility methods std::pair AssumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty); + const GRState *SetCStringLength(const GRState *state, const MemRegion *MR, + SVal StrLen); SVal GetCStringLengthForRegion(CheckerContext &C, const GRState *&state, const Expr *Ex, const MemRegion *MR); SVal GetCStringLength(CheckerContext &C, const GRState *&state, const Expr *Ex, SVal Buf); + const GRState *InvalidateBuffer(CheckerContext &C, const GRState *state, + const Expr *Ex, SVal V); + bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, const MemRegion *MR); @@ -68,11 +78,13 @@ public: const GRState *CheckNonNull(CheckerContext &C, const GRState *state, const Expr *S, SVal l); const GRState *CheckLocation(CheckerContext &C, const GRState *state, - const Expr *S, SVal l); + const Expr *S, SVal l, + bool IsDestination = false); const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, const Expr *Size, const Expr *FirstBuf, - const Expr *SecondBuf = NULL); + const Expr *SecondBuf = NULL, + bool FirstIsDestination = false); const GRState *CheckOverlap(CheckerContext &C, const GRState *state, const Expr *Size, const Expr *First, const Expr *Second); @@ -156,7 +168,8 @@ const GRState *CStringChecker::CheckNonNull(CheckerContext &C, // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? const GRState *CStringChecker::CheckLocation(CheckerContext &C, const GRState *state, - const Expr *S, SVal l) { + const Expr *S, SVal l, + bool IsDestination) { // If a previous check has failed, propagate the failure. if (!state) return NULL; @@ -189,17 +202,26 @@ const GRState *CStringChecker::CheckLocation(CheckerContext &C, if (!N) return NULL; - if (!BT_Bounds) - BT_Bounds = new BuiltinBug("Out-of-bound array access", - "Byte string function accesses out-of-bound array element " - "(buffer overflow)"); + BuiltinBug *BT; + if (IsDestination) { + if (!BT_BoundsWrite) { + BT_BoundsWrite = new BuiltinBug("Out-of-bound array access", + "Byte string function overflows destination buffer"); + } + BT = static_cast(BT_BoundsWrite); + } else { + if (!BT_Bounds) { + BT_Bounds = new BuiltinBug("Out-of-bound array access", + "Byte string function accesses out-of-bound array element"); + } + BT = static_cast(BT_Bounds); + } // FIXME: It would be nice to eventually make this diagnostic more clear, // e.g., by referencing the original declaration or by saying *why* this // reference is outside the range. // Generate a report for this bug. - BuiltinBug *BT = static_cast(BT_Bounds); RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); report->addRange(S->getSourceRange()); @@ -216,7 +238,8 @@ const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, const GRState *state, const Expr *Size, const Expr *FirstBuf, - const Expr *SecondBuf) { + const Expr *SecondBuf, + bool FirstIsDestination) { // If a previous check has failed, propagate the failure. if (!state) return NULL; @@ -250,7 +273,7 @@ const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, if (Loc *BufLoc = dyn_cast(&BufStart)) { SVal BufEnd = SV.EvalBinOpLN(state, BinaryOperator::Add, *BufLoc, LastOffset, PtrTy); - state = CheckLocation(C, state, FirstBuf, BufEnd); + state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); // If the buffer isn't large enough, abort. if (!state) @@ -407,6 +430,42 @@ void CStringChecker::EmitOverlapBug(CheckerContext &C, const GRState *state, C.EmitReport(report); } +const GRState *CStringChecker::SetCStringLength(const GRState *state, + const MemRegion *MR, + SVal StrLen) { + assert(!StrLen.isUndef() && "Attempt to set an undefined string length"); + if (StrLen.isUnknown()) + return state; + + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: + // FIXME: This can happen if we strcpy() into a string region. This is + // undefined [C99 6.4.5p6], but we should still warn about it. + return state; + + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::VarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + return state->set(MR, StrLen); + + case MemRegion::ElementRegionKind: + // FIXME: Handle element regions by upper-bounding the parent region's + // string length. + return state; + + default: + // Other regions (mostly non-data) can't have a reliable C string length. + // For now, just ignore the change. + // FIXME: These are rare but not impossible. We should output some kind of + // warning for things like strcpy((char[]){'a', 0}, "b"); + return state; + } +} + SVal CStringChecker::GetCStringLengthForRegion(CheckerContext &C, const GRState *&state, const Expr *Ex, @@ -517,6 +576,37 @@ SVal CStringChecker::GetCStringLength(CheckerContext &C, const GRState *&state, } } +const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, + const GRState *state, + const Expr *E, SVal V) { + Loc *L = dyn_cast(&V); + if (!L) + return state; + + // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes + // some assumptions about the value that CFRefCount can't. Even so, it should + // probably be refactored. + if (loc::MemRegionVal* MR = dyn_cast(L)) { + const MemRegion *R = MR->getRegion()->StripCasts(); + + // Are we dealing with an ElementRegion? If so, we should be invalidating + // the super-region. + if (const ElementRegion *ER = dyn_cast(R)) { + R = ER->getSuperRegion(); + // FIXME: What about layers of ElementRegions? + } + + // Invalidate this region. + unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); + return state->InvalidateRegion(R, E, Count, NULL); + } + + // If we have a non-region value by chance, just remove the binding. + // FIXME: is this necessary or correct? This handles the non-Region + // cases. Is it ever valid to store to these? + return state->unbindLoc(*L); +} + bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, const MemRegion *MR) { const TypedRegion *TR = dyn_cast(MR); @@ -577,11 +667,20 @@ void CStringChecker::EvalCopyCommon(CheckerContext &C, const GRState *state, // If the size can be nonzero, we have to check the other arguments. if (StNonZeroSize) { state = StNonZeroSize; - state = CheckBufferAccess(C, state, Size, Dest, Source); + state = CheckBufferAccess(C, state, Size, Dest, Source, + /* FirstIsDst = */ true); if (Restricted) state = CheckOverlap(C, state, Size, Dest, Source); - if (state) + + if (state) { + // Invalidate the destination. + // FIXME: Even if we can't perfectly model the copy, we should see if we + // can use LazyCompoundVals to copy the source values into the destination. + // This would probably remove any existing bindings past the end of the + // copied region, but that's still an improvement over blank invalidation. + state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); C.addTransition(state); + } } } @@ -668,7 +767,7 @@ void CStringChecker::EvalMemcmp(CheckerContext &C, const CallExpr *CE) { if (state) { // The return value is the comparison result, which we don't know. unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); - SVal CmpV = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count); + SVal CmpV = ValMgr.getConjuredSymbolVal(NULL, CE, Count); state = state->BindExpr(CE, CmpV); C.addTransition(state); } @@ -698,7 +797,7 @@ void CStringChecker::EvalStrlen(CheckerContext &C, const CallExpr *CE) { if (StrLen.isUnknown()) { ValueManager &ValMgr = C.getValueManager(); unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); - StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count); + StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, Count); } // Bind the return value. @@ -707,6 +806,90 @@ void CStringChecker::EvalStrlen(CheckerContext &C, const CallExpr *CE) { } } +void CStringChecker::EvalStrcpy(CheckerContext &C, const CallExpr *CE) { + // char *strcpy(char *restrict dst, const char *restrict src); + EvalStrcpyCommon(C, CE, /* ReturnEnd = */ false); +} + +void CStringChecker::EvalStpcpy(CheckerContext &C, const CallExpr *CE) { + // char *stpcpy(char *restrict dst, const char *restrict src); + EvalStrcpyCommon(C, CE, /* ReturnEnd = */ true); +} + +void CStringChecker::EvalStrcpyCommon(CheckerContext &C, const CallExpr *CE, + bool ReturnEnd) { + const GRState *state = C.getState(); + + // Check that the destination is non-null + const Expr *Dst = CE->getArg(0); + SVal DstVal = state->getSVal(Dst); + + state = CheckNonNull(C, state, Dst, DstVal); + if (!state) + return; + + // Check that the source is non-null. + const Expr *Src = CE->getArg(1); + SVal SrcVal = state->getSVal(Src); + + state = CheckNonNull(C, state, Src, SrcVal); + if (!state) + return; + + // Get the string length of the source. + SVal StrLen = GetCStringLength(C, state, Src, SrcVal); + + // If the source isn't a valid C string, give up. + if (StrLen.isUndef()) + return; + + SVal Result = (ReturnEnd ? UnknownVal() : DstVal); + + // If the destination is a MemRegion, try to check for a buffer overflow and + // record the new string length. + if (loc::MemRegionVal *DstRegVal = dyn_cast(&DstVal)) { + // If the length is known, we can check for an overflow. + if (NonLoc *KnownStrLen = dyn_cast(&StrLen)) { + SValuator &SV = C.getSValuator(); + + SVal LastElement = SV.EvalBinOpLN(state, BinaryOperator::Add, + *DstRegVal, *KnownStrLen, + Dst->getType()); + + state = CheckLocation(C, state, Dst, LastElement, /* IsDst = */ true); + if (!state) + return; + + // If this is a stpcpy-style copy, the last element is the return value. + if (ReturnEnd) + Result = LastElement; + } + + // Invalidate the destination. This must happen before we set the C string + // length because invalidation will clear the length. + // FIXME: Even if we can't perfectly model the copy, we should see if we + // can use LazyCompoundVals to copy the source values into the destination. + // This would probably remove any existing bindings past the end of the + // string, but that's still an improvement over blank invalidation. + state = InvalidateBuffer(C, state, Dst, *DstRegVal); + + // Set the C string length of the destination. + state = SetCStringLength(state, DstRegVal->getRegion(), StrLen); + } + + // If this is a stpcpy-style copy, but we were unable to check for a buffer + // overflow, we still need a result. Conjure a return value. + if (ReturnEnd && Result.isUnknown()) { + ValueManager &ValMgr = C.getValueManager(); + unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); + StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, Count); + } + + // Set the return value. + state = state->BindExpr(CE, Result); + C.addTransition(state); +} + //===----------------------------------------------------------------------===// // The driver method, and other Checker callbacks. //===----------------------------------------------------------------------===// @@ -730,6 +913,8 @@ bool CStringChecker::EvalCallExpr(CheckerContext &C, const CallExpr *CE) { .Cases("memcpy", "__memcpy_chk", &CStringChecker::EvalMemcpy) .Cases("memcmp", "bcmp", &CStringChecker::EvalMemcmp) .Cases("memmove", "__memmove_chk", &CStringChecker::EvalMemmove) + .Cases("strcpy", "__strcpy_chk", &CStringChecker::EvalStrcpy) + .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::EvalStpcpy) .Case("strlen", &CStringChecker::EvalStrlen) .Case("bcopy", &CStringChecker::EvalBcopy) .Default(NULL); diff --git a/test/Analysis/bstring.c b/test/Analysis/bstring.c index ae9ba4f973..ffe420f725 100644 --- a/test/Analysis/bstring.c +++ b/test/Analysis/bstring.c @@ -48,27 +48,30 @@ void *memcpy(void *restrict s1, const void *restrict s2, size_t n); void memcpy0 () { char src[] = {1, 2, 3, 4}; - char dst[4]; + char dst[4] = {0}; memcpy(dst, src, 4); // no-warning if (memcpy(dst, src, 4) != dst) { (void)*(char*)0; // no-warning } + + if (dst[0] != 0) + (void)*(char*)0; // expected-warning{{null}} } void memcpy1 () { char src[] = {1, 2, 3, 4}; char dst[10]; - memcpy(dst, src, 5); // expected-warning{{out-of-bound}} + memcpy(dst, src, 5); // expected-warning{{Byte string function accesses out-of-bound array element}} } void memcpy2 () { char src[] = {1, 2, 3, 4}; char dst[1]; - memcpy(dst, src, 4); // expected-warning{{out-of-bound}} + memcpy(dst, src, 4); // expected-warning{{Byte string function overflows destination buffer}} } void memcpy3 () { @@ -82,14 +85,14 @@ void memcpy4 () { char src[] = {1, 2, 3, 4}; char dst[10]; - memcpy(dst+2, src+2, 3); // expected-warning{{out-of-bound}} + memcpy(dst+2, src+2, 3); // expected-warning{{Byte string function accesses out-of-bound array element}} } void memcpy5() { char src[] = {1, 2, 3, 4}; char dst[3]; - memcpy(dst+2, src+2, 2); // expected-warning{{out-of-bound}} + memcpy(dst+2, src+2, 2); // expected-warning{{Byte string function overflows destination buffer}} } void memcpy6() { @@ -150,13 +153,16 @@ void *memmove(void *s1, const void *s2, size_t n); void memmove0 () { char src[] = {1, 2, 3, 4}; - char dst[4]; + char dst[4] = {0}; memmove(dst, src, 4); // no-warning if (memmove(dst, src, 4) != dst) { (void)*(char*)0; // no-warning } + + if (dst[0] != 0) + (void)*(char*)0; // expected-warning{{null}} } void memmove1 () { @@ -170,7 +176,7 @@ void memmove2 () { char src[] = {1, 2, 3, 4}; char dst[1]; - memmove(dst, src, 4); // expected-warning{{out-of-bound}} + memmove(dst, src, 4); // expected-warning{{overflow}} } //===----------------------------------------------------------------------=== @@ -263,9 +269,12 @@ void bcopy(/*const*/ void *s1, void *s2, size_t n); void bcopy0 () { char src[] = {1, 2, 3, 4}; - char dst[4]; + char dst[4] = {0}; bcopy(src, dst, 4); // no-warning + + if (dst[0] != 0) + (void)*(char*)0; // expected-warning{{null}} } void bcopy1 () { @@ -279,5 +288,5 @@ void bcopy2 () { char src[] = {1, 2, 3, 4}; char dst[1]; - bcopy(src, dst, 4); // expected-warning{{out-of-bound}} + bcopy(src, dst, 4); // expected-warning{{overflow}} } diff --git a/test/Analysis/string.c b/test/Analysis/string.c index af43c4b03c..35ed7106f7 100644 --- a/test/Analysis/string.c +++ b/test/Analysis/string.c @@ -24,6 +24,7 @@ # define BUILTIN(f) f #endif /* USE_BUILTINS */ +#define NULL 0 typedef typeof(sizeof(int)) size_t; //===----------------------------------------------------------------------=== @@ -137,3 +138,103 @@ void strlen_liveness(const char *x) { if (strlen(x) < 5) (void)*(char*)0; // no-warning } + +//===----------------------------------------------------------------------=== +// strcpy() +//===----------------------------------------------------------------------=== + +#ifdef VARIANT + +#define __strcpy_chk BUILTIN(__strcpy_chk) +char *__strcpy_chk(char *restrict s1, const char *restrict s2, size_t destlen); + +#define strcpy(a,b) __strcpy_chk(a,b,(size_t)-1) + +#else /* VARIANT */ + +#define strcpy BUILTIN(strcpy) +char *strcpy(char *restrict s1, const char *restrict s2); + +#endif /* VARIANT */ + + +void strcpy_null_dst(char *x) { + strcpy(NULL, x); // expected-warning{{Null pointer argument in call to byte string function}} +} + +void strcpy_null_src(char *x) { + strcpy(x, NULL); // expected-warning{{Null pointer argument in call to byte string function}} +} + +void strcpy_fn(char *x) { + strcpy(x, (char*)&strcpy_fn); // expected-warning{{Argument to byte string function is the address of the function 'strcpy_fn', which is not a null-terminated string}} +} + +void strcpy_effects(char *x, char *y) { + char a = x[0]; + + if (strcpy(x, y) != x) + (void)*(char*)0; // no-warning + + if (strlen(x) != strlen(y)) + (void)*(char*)0; // no-warning + + if (a != x[0]) + (void)*(char*)0; // expected-warning{{null}} +} + +void strcpy_overflow(char *y) { + char x[4]; + if (strlen(y) == 4) + strcpy(x, y); // expected-warning{{Byte string function overflows destination buffer}} +} + +void strcpy_no_overflow(char *y) { + char x[4]; + if (strlen(y) == 3) + strcpy(x, y); // no-warning +} + +//===----------------------------------------------------------------------=== +// stpcpy() +//===----------------------------------------------------------------------=== + +#ifdef VARIANT + +#define __stpcpy_chk BUILTIN(__stpcpy_chk) +char *__stpcpy_chk(char *restrict s1, const char *restrict s2, size_t destlen); + +#define stpcpy(a,b) __stpcpy_chk(a,b,(size_t)-1) + +#else /* VARIANT */ + +#define stpcpy BUILTIN(stpcpy) +char *stpcpy(char *restrict s1, const char *restrict s2); + +#endif /* VARIANT */ + + +void stpcpy_effect(char *x, char *y) { + char a = x[0]; + + if (stpcpy(x, y) != &x[strlen(y)]) + (void)*(char*)0; // no-warning + + if (strlen(x) != strlen(y)) + (void)*(char*)0; // no-warning + + if (a != x[0]) + (void)*(char*)0; // expected-warning{{null}} +} + +void stpcpy_overflow(char *y) { + char x[4]; + if (strlen(y) == 4) + stpcpy(x, y); // expected-warning{{Byte string function overflows destination buffer}} +} + +void stpcpy_no_overflow(char *y) { + char x[4]; + if (strlen(y) == 3) + stpcpy(x, y); // no-warning +} -- 2.40.0